# Summary of Analysis and Cleaning strategy 
- Review all Columns and null values 
- Change column names or lower case 
- Change column names from having '.' to '_' for easier read 
- 

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Reading in Dataset 

In [2]:
# Read the data into a Pandas DataFrame
coffee_df = pd.read_csv('arabica_data_cleaned.csv')
coffee_df.head()

Unnamed: 0.1,Unnamed: 0,Species,Owner,Country.of.Origin,Farm.Name,Lot.Number,Mill,ICO.Number,Company,Altitude,...,Color,Category.Two.Defects,Expiration,Certification.Body,Certification.Address,Certification.Contact,unit_of_measurement,altitude_low_meters,altitude_high_meters,altitude_mean_meters
0,1,Arabica,metad plc,Ethiopia,metad plc,,metad plc,2014/2015,metad agricultural developmet plc,1950-2200,...,Green,0,"April 3rd, 2016",METAD Agricultural Development plc,309fcf77415a3661ae83e027f7e5f05dad786e44,19fef5a731de2db57d16da10287413f5f99bc2dd,m,1950.0,2200.0,2075.0
1,2,Arabica,metad plc,Ethiopia,metad plc,,metad plc,2014/2015,metad agricultural developmet plc,1950-2200,...,Green,1,"April 3rd, 2016",METAD Agricultural Development plc,309fcf77415a3661ae83e027f7e5f05dad786e44,19fef5a731de2db57d16da10287413f5f99bc2dd,m,1950.0,2200.0,2075.0
2,3,Arabica,grounds for health admin,Guatemala,"san marcos barrancas ""san cristobal cuch",,,,,1600 - 1800 m,...,,0,"May 31st, 2011",Specialty Coffee Association,36d0d00a3724338ba7937c52a378d085f2172daa,0878a7d4b9d35ddbf0fe2ce69a2062cceb45a660,m,1600.0,1800.0,1700.0
3,4,Arabica,yidnekachew dabessa,Ethiopia,yidnekachew dabessa coffee plantation,,wolensu,,yidnekachew debessa coffee plantation,1800-2200,...,Green,2,"March 25th, 2016",METAD Agricultural Development plc,309fcf77415a3661ae83e027f7e5f05dad786e44,19fef5a731de2db57d16da10287413f5f99bc2dd,m,1800.0,2200.0,2000.0
4,5,Arabica,metad plc,Ethiopia,metad plc,,metad plc,2014/2015,metad agricultural developmet plc,1950-2200,...,Green,2,"April 3rd, 2016",METAD Agricultural Development plc,309fcf77415a3661ae83e027f7e5f05dad786e44,19fef5a731de2db57d16da10287413f5f99bc2dd,m,1950.0,2200.0,2075.0


# Exploring the coffee_df 

In [3]:
# Get a brief summary of DataFrame.
coffee_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1311 entries, 0 to 1310
Data columns (total 44 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Unnamed: 0             1311 non-null   int64  
 1   Species                1311 non-null   object 
 2   Owner                  1304 non-null   object 
 3   Country.of.Origin      1310 non-null   object 
 4   Farm.Name              955 non-null    object 
 5   Lot.Number             270 non-null    object 
 6   Mill                   1001 non-null   object 
 7   ICO.Number             1165 non-null   object 
 8   Company                1102 non-null   object 
 9   Altitude               1088 non-null   object 
 10  Region                 1254 non-null   object 
 11  Producer               1081 non-null   object 
 12  Number.of.Bags         1311 non-null   int64  
 13  Bag.Weight             1311 non-null   object 
 14  In.Country.Partner     1311 non-null   object 
 15  Harv

In [4]:
# Get the columns of the DataFrame 
coffee_df.columns

Index(['Unnamed: 0', 'Species', 'Owner', 'Country.of.Origin', 'Farm.Name',
       'Lot.Number', 'Mill', 'ICO.Number', 'Company', 'Altitude', 'Region',
       'Producer', 'Number.of.Bags', 'Bag.Weight', 'In.Country.Partner',
       'Harvest.Year', 'Grading.Date', 'Owner.1', 'Variety',
       'Processing.Method', 'Aroma', 'Flavor', 'Aftertaste', 'Acidity', 'Body',
       'Balance', 'Uniformity', 'Clean.Cup', 'Sweetness', 'Cupper.Points',
       'Total.Cup.Points', 'Moisture', 'Category.One.Defects', 'Quakers',
       'Color', 'Category.Two.Defects', 'Expiration', 'Certification.Body',
       'Certification.Address', 'Certification.Contact', 'unit_of_measurement',
       'altitude_low_meters', 'altitude_high_meters', 'altitude_mean_meters'],
      dtype='object')

In [5]:
# Summary for numerical columns
print("Numerical Summary:\n")
print(coffee_df.describe())

Numerical Summary:

        Unnamed: 0  Number.of.Bags        Aroma       Flavor   Aftertaste  \
count  1311.000000     1311.000000  1311.000000  1311.000000  1311.000000   
mean    656.000763      153.887872     7.563806     7.518070     7.397696   
std     378.598733      129.733734     0.378666     0.399979     0.405119   
min       1.000000        0.000000     0.000000     0.000000     0.000000   
25%     328.500000       14.500000     7.420000     7.330000     7.250000   
50%     656.000000      175.000000     7.580000     7.580000     7.420000   
75%     983.500000      275.000000     7.750000     7.750000     7.580000   
max    1312.000000     1062.000000     8.750000     8.830000     8.670000   

           Acidity         Body      Balance   Uniformity   Clean.Cup  \
count  1311.000000  1311.000000  1311.000000  1311.000000  1311.00000   
mean      7.533112     7.517727     7.517506     9.833394     9.83312   
std       0.381599     0.359213     0.406316     0.559343     0.771

In [6]:
# Display the max rows and columns 
pd.set_option('display.max_rows', 10)
pd.set_option('display.max_columns', 44)
coffee_df

Unnamed: 0.1,Unnamed: 0,Species,Owner,Country.of.Origin,Farm.Name,Lot.Number,Mill,ICO.Number,Company,Altitude,Region,Producer,Number.of.Bags,Bag.Weight,In.Country.Partner,Harvest.Year,Grading.Date,Owner.1,Variety,Processing.Method,Aroma,Flavor,Aftertaste,Acidity,Body,Balance,Uniformity,Clean.Cup,Sweetness,Cupper.Points,Total.Cup.Points,Moisture,Category.One.Defects,Quakers,Color,Category.Two.Defects,Expiration,Certification.Body,Certification.Address,Certification.Contact,unit_of_measurement,altitude_low_meters,altitude_high_meters,altitude_mean_meters
0,1,Arabica,metad plc,Ethiopia,metad plc,,metad plc,2014/2015,metad agricultural developmet plc,1950-2200,guji-hambela,METAD PLC,300,60 kg,METAD Agricultural Development plc,2014,"April 4th, 2015",metad plc,,Washed / Wet,8.67,8.83,8.67,8.75,8.50,8.42,10.00,10.00,10.00,8.75,90.58,0.12,0,0.0,Green,0,"April 3rd, 2016",METAD Agricultural Development plc,309fcf77415a3661ae83e027f7e5f05dad786e44,19fef5a731de2db57d16da10287413f5f99bc2dd,m,1950.00,2200.00,2075.00
1,2,Arabica,metad plc,Ethiopia,metad plc,,metad plc,2014/2015,metad agricultural developmet plc,1950-2200,guji-hambela,METAD PLC,300,60 kg,METAD Agricultural Development plc,2014,"April 4th, 2015",metad plc,Other,Washed / Wet,8.75,8.67,8.50,8.58,8.42,8.42,10.00,10.00,10.00,8.58,89.92,0.12,0,0.0,Green,1,"April 3rd, 2016",METAD Agricultural Development plc,309fcf77415a3661ae83e027f7e5f05dad786e44,19fef5a731de2db57d16da10287413f5f99bc2dd,m,1950.00,2200.00,2075.00
2,3,Arabica,grounds for health admin,Guatemala,"san marcos barrancas ""san cristobal cuch",,,,,1600 - 1800 m,,,5,1,Specialty Coffee Association,,"May 31st, 2010",Grounds for Health Admin,Bourbon,,8.42,8.50,8.42,8.42,8.33,8.42,10.00,10.00,10.00,9.25,89.75,0.00,0,0.0,,0,"May 31st, 2011",Specialty Coffee Association,36d0d00a3724338ba7937c52a378d085f2172daa,0878a7d4b9d35ddbf0fe2ce69a2062cceb45a660,m,1600.00,1800.00,1700.00
3,4,Arabica,yidnekachew dabessa,Ethiopia,yidnekachew dabessa coffee plantation,,wolensu,,yidnekachew debessa coffee plantation,1800-2200,oromia,Yidnekachew Dabessa Coffee Plantation,320,60 kg,METAD Agricultural Development plc,2014,"March 26th, 2015",Yidnekachew Dabessa,,Natural / Dry,8.17,8.58,8.42,8.42,8.50,8.25,10.00,10.00,10.00,8.67,89.00,0.11,0,0.0,Green,2,"March 25th, 2016",METAD Agricultural Development plc,309fcf77415a3661ae83e027f7e5f05dad786e44,19fef5a731de2db57d16da10287413f5f99bc2dd,m,1800.00,2200.00,2000.00
4,5,Arabica,metad plc,Ethiopia,metad plc,,metad plc,2014/2015,metad agricultural developmet plc,1950-2200,guji-hambela,METAD PLC,300,60 kg,METAD Agricultural Development plc,2014,"April 4th, 2015",metad plc,Other,Washed / Wet,8.25,8.50,8.25,8.50,8.42,8.33,10.00,10.00,10.00,8.58,88.83,0.12,0,0.0,Green,2,"April 3rd, 2016",METAD Agricultural Development plc,309fcf77415a3661ae83e027f7e5f05dad786e44,19fef5a731de2db57d16da10287413f5f99bc2dd,m,1950.00,2200.00,2075.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1306,1307,Arabica,juan carlos garcia lopez,Mexico,el centenario,,"la esperanza, municipio juchique de ferrer, ve...",1104328663,terra mia,900,juchique de ferrer,JUAN CARLOS GARCÍA LOPEZ,12,1 kg,AMECAFE,2012,"September 17th, 2012",JUAN CARLOS GARCIA LOPEZ,Bourbon,Washed / Wet,7.08,6.83,6.25,7.42,7.25,6.75,10.00,0.00,10.00,6.75,68.33,0.11,0,0.0,,20,"September 17th, 2013",AMECAFE,59e396ad6e22a1c22b248f958e1da2bd8af85272,0eb4ee5b3f47b20b049548a2fd1e7d4a2b70d0a7,m,900.00,900.00,900.00
1307,1308,Arabica,myriam kaplan-pasternak,Haiti,200 farms,,coeb koperativ ekselsyo basen (350 members),,haiti coffee,~350m,"department d'artibonite , haiti",COEB Koperativ Ekselsyo Basen,1,2 kg,Specialty Coffee Association,2012,"May 24th, 2012",Myriam Kaplan-Pasternak,Typica,Natural / Dry,6.75,6.58,6.42,6.67,7.08,6.67,9.33,6.00,6.00,6.42,67.92,0.14,8,0.0,Blue-Green,16,"May 24th, 2013",Specialty Coffee Association,36d0d00a3724338ba7937c52a378d085f2172daa,0878a7d4b9d35ddbf0fe2ce69a2062cceb45a660,m,350.00,350.00,350.00
1308,1309,Arabica,"exportadora atlantic, s.a.",Nicaragua,finca las marías,017-053-0211/ 017-053-0212,beneficio atlantic condega,017-053-0211/ 017-053-0212,exportadora atlantic s.a,1100,jalapa,Teófilo Narváez,550,69 kg,Instituto Hondureño del Café,2016,"June 6th, 2017","Exportadora Atlantic, S.A.",Caturra,Other,7.25,6.58,6.33,6.25,6.42,6.08,6.00,6.00,6.00,6.17,63.08,0.13,1,0.0,Green,5,"June 6th, 2018",Instituto Hondureño del Café,b4660a57e9f8cc613ae5b8f02bfce8634c763ab4,7f521ca403540f81ec99daec7da19c2788393880,m,1100.00,1100.00,1100.00
1309,1310,Arabica,juan luis alvarado romero,Guatemala,finca el limon,,beneficio serben,11/853/165,unicafe,4650,nuevo oriente,WILLIAM ESTUARDO MARTINEZ PACHECO,275,1 kg,Asociacion Nacional Del Café,2012,"May 24th, 2012",Juan Luis Alvarado Romero,Catuai,Washed / Wet,7.50,6.67,6.67,7.67,7.33,6.67,8.00,1.33,1.33,6.67,59.83,0.10,0,0.0,Green,4,"May 24th, 2013",Asociacion Nacional Del Café,b1f20fe3a819fd6b2ee0eb8fdc3da256604f1e53,724f04ad10ed31dbb9d260f0dfd221ba48be8a95,ft,1417.32,1417.32,1417.32


# Data Cleaning - Try Removing unique values 

In [7]:
# Create an empty dictionary to store unique values for each column
unique_values = {}

# Loop through the columns and find unique values
for column in coffee_df.columns:
    unique_values[column] = coffee_df[column].unique()

# Display unique values for each column
for column, values in unique_values.items():
    print(f'Column: {column}')
    print(values)
    print('\n')

Column: Unnamed: 0
[   1    2    3 ... 1309 1310 1312]


Column: Species
['Arabica']


Column: Owner
['metad plc' 'grounds for health admin' 'yidnekachew dabessa' 'ji-ae ahn'
 'hugo valdivia' 'ethiopia commodity exchange' 'diamond enterprise plc'
 'mohammed lalo' 'cqi q coffee sample representative'
 'yunnan coffee exchange' 'essencecoffee' 'the coffee source inc.'
 'roberto licona franco' 'nucoffee' 'kabum trading company'
 'bismarck castro' 'lin, che-hao krude 林哲豪' 'nora zeas'
 'specialty coffee-korea' 'francisco a mena' 'hider abamecha'
 'daniel magu' 'kona pacific farmers cooperative' 'itdp international'
 'jacques pereira carneiro' 'jungle estate' 'great lakes coffee uganda'
 'lusso lab' 'afca' 'juan luis alvarado romero' 'kawacom uganda ltd'
 'exportadora de cafe condor s.a' 'gonzalo hernandez'
 'ibrahim hussien speciality coffee producer &export'
 'seid damtew coffee planataion' 'dane loraas' 'colbran coffeelands, ltd.'
 'atlantic specialty coffee' 'assefa belay coffee producer'

# Refining column names 
- changing all column names to lower case 
- changing all '.' to '_' for easier read 

In [8]:
# lower case all column names for ease of access 
coffee_df.columns = coffee_df.columns.str.lower()

# Replace all '.' to '_' for easier read 
coffee_df.columns = coffee_df.columns.str.replace('.','_')

# confirm all columns renamed
coffee_df.columns

  coffee_df.columns = coffee_df.columns.str.replace('.','_')


Index(['unnamed: 0', 'species', 'owner', 'country_of_origin', 'farm_name',
       'lot_number', 'mill', 'ico_number', 'company', 'altitude', 'region',
       'producer', 'number_of_bags', 'bag_weight', 'in_country_partner',
       'harvest_year', 'grading_date', 'owner_1', 'variety',
       'processing_method', 'aroma', 'flavor', 'aftertaste', 'acidity', 'body',
       'balance', 'uniformity', 'clean_cup', 'sweetness', 'cupper_points',
       'total_cup_points', 'moisture', 'category_one_defects', 'quakers',
       'color', 'category_two_defects', 'expiration', 'certification_body',
       'certification_address', 'certification_contact', 'unit_of_measurement',
       'altitude_low_meters', 'altitude_high_meters', 'altitude_mean_meters'],
      dtype='object')

In [9]:
# drop unnamed column 
coffee_df = coffee_df.drop(columns=['unnamed: 0'])

In [22]:
# drop unnecessary columns 
coffee_df = coffee_df.drop(columns=['owner', 'lot_number', 'mill', 'ico_number', 'company', 'producer', 'number_of_bags', 'bag_weight', 'in_country_partner', 'harvest_year', 'grading_date', 'owner_1', 'moisture', 'category_one_defects', 'quakers', 'color', 'category_two_defects', 'expiration', 'certification_body', 'certification_address', 'certification_contact', 'unit_of_measurement', 'altitude_mean_meters'])

KeyError: "['owner', 'lot_number', 'mill', 'ico_number', 'company', 'producer', 'number_of_bags', 'bag_weight', 'in_country_partner', 'harvest_year', 'grading_date', 'owner_1', 'category_one_defects', 'quakers', 'color', 'category_two_defects', 'expiration', 'certification_body', 'certification_address', 'certification_contact', 'unit_of_measurement', 'altitude_mean_meters'] not found in axis"

In [None]:
# check columns
coffee_df.columns

In [11]:
# Function to extract weights in kg
def extract_kg(weight_str):
    parts = weight_str.split(' ')
    if 'kg' in weight_str:
        return float(parts[0])
    elif 'lb' in weight_str:
        # Convert pounds to kilograms (1 lb = 0.453592 kg)
        return float(parts[0]) * 0.453592
    elif len(parts) >= 1:
        # If no unit specified but there is a number, assume it's in kilograms
        return float(parts[0])
    else:
        return None  # Handle other cases if needed

# Create 'Weight_kg' column by applying the function
coffee_df['weight(kg)'] = coffee_df['bag_weight'].apply(extract_kg)

# Function to extract weights in lb
def extract_lb(weight_str):
    if 'lb' in weight_str:
        return float(weight_str.split(' ')[0])
    elif 'kg' in weight_str:
        # Convert kilograms to pounds (1 kg = 2.20462 lb)
        return float(weight_str.split(' ')[0]) * 2.20462
    else:
        return None  # Handle other cases if needed

# Create 'Weight_lb' column by applying the function
coffee_df['weight(lb)'] = coffee_df['bag_weight'].apply(extract_lb)

In [12]:
# Display the DataFrame with the new columns
coffee_df.head()

Unnamed: 0,species,owner,country_of_origin,farm_name,lot_number,mill,ico_number,company,altitude,region,producer,number_of_bags,bag_weight,in_country_partner,harvest_year,grading_date,owner_1,variety,processing_method,aroma,flavor,aftertaste,...,body,balance,uniformity,clean_cup,sweetness,cupper_points,total_cup_points,moisture,category_one_defects,quakers,color,category_two_defects,expiration,certification_body,certification_address,certification_contact,unit_of_measurement,altitude_low_meters,altitude_high_meters,altitude_mean_meters,weight(kg),weight(lb)
0,Arabica,metad plc,Ethiopia,metad plc,,metad plc,2014/2015,metad agricultural developmet plc,1950-2200,guji-hambela,METAD PLC,300,60 kg,METAD Agricultural Development plc,2014.0,"April 4th, 2015",metad plc,,Washed / Wet,8.67,8.83,8.67,...,8.5,8.42,10.0,10.0,10.0,8.75,90.58,0.12,0,0.0,Green,0,"April 3rd, 2016",METAD Agricultural Development plc,309fcf77415a3661ae83e027f7e5f05dad786e44,19fef5a731de2db57d16da10287413f5f99bc2dd,m,1950.0,2200.0,2075.0,60.0,132.2772
1,Arabica,metad plc,Ethiopia,metad plc,,metad plc,2014/2015,metad agricultural developmet plc,1950-2200,guji-hambela,METAD PLC,300,60 kg,METAD Agricultural Development plc,2014.0,"April 4th, 2015",metad plc,Other,Washed / Wet,8.75,8.67,8.5,...,8.42,8.42,10.0,10.0,10.0,8.58,89.92,0.12,0,0.0,Green,1,"April 3rd, 2016",METAD Agricultural Development plc,309fcf77415a3661ae83e027f7e5f05dad786e44,19fef5a731de2db57d16da10287413f5f99bc2dd,m,1950.0,2200.0,2075.0,60.0,132.2772
2,Arabica,grounds for health admin,Guatemala,"san marcos barrancas ""san cristobal cuch",,,,,1600 - 1800 m,,,5,1,Specialty Coffee Association,,"May 31st, 2010",Grounds for Health Admin,Bourbon,,8.42,8.5,8.42,...,8.33,8.42,10.0,10.0,10.0,9.25,89.75,0.0,0,0.0,,0,"May 31st, 2011",Specialty Coffee Association,36d0d00a3724338ba7937c52a378d085f2172daa,0878a7d4b9d35ddbf0fe2ce69a2062cceb45a660,m,1600.0,1800.0,1700.0,1.0,
3,Arabica,yidnekachew dabessa,Ethiopia,yidnekachew dabessa coffee plantation,,wolensu,,yidnekachew debessa coffee plantation,1800-2200,oromia,Yidnekachew Dabessa Coffee Plantation,320,60 kg,METAD Agricultural Development plc,2014.0,"March 26th, 2015",Yidnekachew Dabessa,,Natural / Dry,8.17,8.58,8.42,...,8.5,8.25,10.0,10.0,10.0,8.67,89.0,0.11,0,0.0,Green,2,"March 25th, 2016",METAD Agricultural Development plc,309fcf77415a3661ae83e027f7e5f05dad786e44,19fef5a731de2db57d16da10287413f5f99bc2dd,m,1800.0,2200.0,2000.0,60.0,132.2772
4,Arabica,metad plc,Ethiopia,metad plc,,metad plc,2014/2015,metad agricultural developmet plc,1950-2200,guji-hambela,METAD PLC,300,60 kg,METAD Agricultural Development plc,2014.0,"April 4th, 2015",metad plc,Other,Washed / Wet,8.25,8.5,8.25,...,8.42,8.33,10.0,10.0,10.0,8.58,88.83,0.12,0,0.0,Green,2,"April 3rd, 2016",METAD Agricultural Development plc,309fcf77415a3661ae83e027f7e5f05dad786e44,19fef5a731de2db57d16da10287413f5f99bc2dd,m,1950.0,2200.0,2075.0,60.0,132.2772


In [13]:
# Change grading_date to datetime
from dateutil import parser

# Use apply with lambda to convert both columns to datetime format
coffee_df[['grading_date', 'expiration']] = coffee_df[['grading_date', 'expiration']].apply(lambda x: x.apply(lambda y: parser.parse(y)))

In [14]:
coffee_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1311 entries, 0 to 1310
Data columns (total 45 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   species                1311 non-null   object        
 1   owner                  1304 non-null   object        
 2   country_of_origin      1310 non-null   object        
 3   farm_name              955 non-null    object        
 4   lot_number             270 non-null    object        
 5   mill                   1001 non-null   object        
 6   ico_number             1165 non-null   object        
 7   company                1102 non-null   object        
 8   altitude               1088 non-null   object        
 9   region                 1254 non-null   object        
 10  producer               1081 non-null   object        
 11  number_of_bags         1311 non-null   int64         
 12  bag_weight             1311 non-null   object        
 13  in_

In [15]:
# View dataframe
coffee_df.head()

Unnamed: 0,species,owner,country_of_origin,farm_name,lot_number,mill,ico_number,company,altitude,region,producer,number_of_bags,bag_weight,in_country_partner,harvest_year,grading_date,owner_1,variety,processing_method,aroma,flavor,aftertaste,...,body,balance,uniformity,clean_cup,sweetness,cupper_points,total_cup_points,moisture,category_one_defects,quakers,color,category_two_defects,expiration,certification_body,certification_address,certification_contact,unit_of_measurement,altitude_low_meters,altitude_high_meters,altitude_mean_meters,weight(kg),weight(lb)
0,Arabica,metad plc,Ethiopia,metad plc,,metad plc,2014/2015,metad agricultural developmet plc,1950-2200,guji-hambela,METAD PLC,300,60 kg,METAD Agricultural Development plc,2014.0,2015-04-04,metad plc,,Washed / Wet,8.67,8.83,8.67,...,8.5,8.42,10.0,10.0,10.0,8.75,90.58,0.12,0,0.0,Green,0,2016-04-03,METAD Agricultural Development plc,309fcf77415a3661ae83e027f7e5f05dad786e44,19fef5a731de2db57d16da10287413f5f99bc2dd,m,1950.0,2200.0,2075.0,60.0,132.2772
1,Arabica,metad plc,Ethiopia,metad plc,,metad plc,2014/2015,metad agricultural developmet plc,1950-2200,guji-hambela,METAD PLC,300,60 kg,METAD Agricultural Development plc,2014.0,2015-04-04,metad plc,Other,Washed / Wet,8.75,8.67,8.5,...,8.42,8.42,10.0,10.0,10.0,8.58,89.92,0.12,0,0.0,Green,1,2016-04-03,METAD Agricultural Development plc,309fcf77415a3661ae83e027f7e5f05dad786e44,19fef5a731de2db57d16da10287413f5f99bc2dd,m,1950.0,2200.0,2075.0,60.0,132.2772
2,Arabica,grounds for health admin,Guatemala,"san marcos barrancas ""san cristobal cuch",,,,,1600 - 1800 m,,,5,1,Specialty Coffee Association,,2010-05-31,Grounds for Health Admin,Bourbon,,8.42,8.5,8.42,...,8.33,8.42,10.0,10.0,10.0,9.25,89.75,0.0,0,0.0,,0,2011-05-31,Specialty Coffee Association,36d0d00a3724338ba7937c52a378d085f2172daa,0878a7d4b9d35ddbf0fe2ce69a2062cceb45a660,m,1600.0,1800.0,1700.0,1.0,
3,Arabica,yidnekachew dabessa,Ethiopia,yidnekachew dabessa coffee plantation,,wolensu,,yidnekachew debessa coffee plantation,1800-2200,oromia,Yidnekachew Dabessa Coffee Plantation,320,60 kg,METAD Agricultural Development plc,2014.0,2015-03-26,Yidnekachew Dabessa,,Natural / Dry,8.17,8.58,8.42,...,8.5,8.25,10.0,10.0,10.0,8.67,89.0,0.11,0,0.0,Green,2,2016-03-25,METAD Agricultural Development plc,309fcf77415a3661ae83e027f7e5f05dad786e44,19fef5a731de2db57d16da10287413f5f99bc2dd,m,1800.0,2200.0,2000.0,60.0,132.2772
4,Arabica,metad plc,Ethiopia,metad plc,,metad plc,2014/2015,metad agricultural developmet plc,1950-2200,guji-hambela,METAD PLC,300,60 kg,METAD Agricultural Development plc,2014.0,2015-04-04,metad plc,Other,Washed / Wet,8.25,8.5,8.25,...,8.42,8.33,10.0,10.0,10.0,8.58,88.83,0.12,0,0.0,Green,2,2016-04-03,METAD Agricultural Development plc,309fcf77415a3661ae83e027f7e5f05dad786e44,19fef5a731de2db57d16da10287413f5f99bc2dd,m,1950.0,2200.0,2075.0,60.0,132.2772


In [16]:
# Remove '\n' from the 'certification_body' column
coffee_df['certification_body'] = coffee_df['certification_body'].str.replace('\n', '')

In [18]:
coffee_df.head()

Unnamed: 0,species,owner,country_of_origin,farm_name,lot_number,mill,ico_number,company,altitude,region,producer,number_of_bags,bag_weight,in_country_partner,harvest_year,grading_date,owner_1,variety,processing_method,aroma,flavor,aftertaste,...,body,balance,uniformity,clean_cup,sweetness,cupper_points,total_cup_points,moisture,category_one_defects,quakers,color,category_two_defects,expiration,certification_body,certification_address,certification_contact,unit_of_measurement,altitude_low_meters,altitude_high_meters,altitude_mean_meters,weight(kg),weight(lb)
0,Arabica,metad plc,Ethiopia,metad plc,,metad plc,2014/2015,metad agricultural developmet plc,1950-2200,guji-hambela,METAD PLC,300,60 kg,METAD Agricultural Development plc,2014.0,2015-04-04,metad plc,,Washed / Wet,8.67,8.83,8.67,...,8.5,8.42,10.0,10.0,10.0,8.75,90.58,0.12,0,0.0,Green,0,2016-04-03,METAD Agricultural Development plc,309fcf77415a3661ae83e027f7e5f05dad786e44,19fef5a731de2db57d16da10287413f5f99bc2dd,m,1950.0,2200.0,2075.0,60.0,132.2772
1,Arabica,metad plc,Ethiopia,metad plc,,metad plc,2014/2015,metad agricultural developmet plc,1950-2200,guji-hambela,METAD PLC,300,60 kg,METAD Agricultural Development plc,2014.0,2015-04-04,metad plc,Other,Washed / Wet,8.75,8.67,8.5,...,8.42,8.42,10.0,10.0,10.0,8.58,89.92,0.12,0,0.0,Green,1,2016-04-03,METAD Agricultural Development plc,309fcf77415a3661ae83e027f7e5f05dad786e44,19fef5a731de2db57d16da10287413f5f99bc2dd,m,1950.0,2200.0,2075.0,60.0,132.2772
2,Arabica,grounds for health admin,Guatemala,"san marcos barrancas ""san cristobal cuch",,,,,1600 - 1800 m,,,5,1,Specialty Coffee Association,,2010-05-31,Grounds for Health Admin,Bourbon,,8.42,8.5,8.42,...,8.33,8.42,10.0,10.0,10.0,9.25,89.75,0.0,0,0.0,,0,2011-05-31,Specialty Coffee Association,36d0d00a3724338ba7937c52a378d085f2172daa,0878a7d4b9d35ddbf0fe2ce69a2062cceb45a660,m,1600.0,1800.0,1700.0,1.0,
3,Arabica,yidnekachew dabessa,Ethiopia,yidnekachew dabessa coffee plantation,,wolensu,,yidnekachew debessa coffee plantation,1800-2200,oromia,Yidnekachew Dabessa Coffee Plantation,320,60 kg,METAD Agricultural Development plc,2014.0,2015-03-26,Yidnekachew Dabessa,,Natural / Dry,8.17,8.58,8.42,...,8.5,8.25,10.0,10.0,10.0,8.67,89.0,0.11,0,0.0,Green,2,2016-03-25,METAD Agricultural Development plc,309fcf77415a3661ae83e027f7e5f05dad786e44,19fef5a731de2db57d16da10287413f5f99bc2dd,m,1800.0,2200.0,2000.0,60.0,132.2772
4,Arabica,metad plc,Ethiopia,metad plc,,metad plc,2014/2015,metad agricultural developmet plc,1950-2200,guji-hambela,METAD PLC,300,60 kg,METAD Agricultural Development plc,2014.0,2015-04-04,metad plc,Other,Washed / Wet,8.25,8.5,8.25,...,8.42,8.33,10.0,10.0,10.0,8.58,88.83,0.12,0,0.0,Green,2,2016-04-03,METAD Agricultural Development plc,309fcf77415a3661ae83e027f7e5f05dad786e44,19fef5a731de2db57d16da10287413f5f99bc2dd,m,1950.0,2200.0,2075.0,60.0,132.2772
