# 1 CB Data Wrangling - chocolatebars

### This script contains the following points
#### 01 Import Libraries
#### 02 Import Data
#### 03 First Look at Data
#### 04 Data Wrangling
#### 05 Export Data

# 01 Import Libraries

In [1]:
# Import Libraries

import pandas as pd
import numpy as np
import os

# 02 Import Data

In [2]:
# Set a path

path = r'C:\Users\Tina\Desktop\CareerFoundry\Data Analytics Immersion\Chocolate Bar Rating'

In [3]:
# Import the "flavors_of_cacao" file

choco = pd.read_csv(os.path.join(path, '02 Data', 'Original Data', 'flavors_of_cacao.csv'), index_col = False)

# 03 First Look at Data

In [4]:
# Shape of "choco"

choco.shape

(1795, 9)

In [5]:
# First few rows of "cust"

choco.head()

Unnamed: 0,Company \n(Maker-if known),Specific Bean Origin\nor Bar Name,REF,Review\nDate,Cocoa\nPercent,Company\nLocation,Rating,Bean\nType,Broad Bean\nOrigin
0,A. Morin,Agua Grande,1876,2016,63%,France,3.75,,Sao Tome
1,A. Morin,Kpime,1676,2015,70%,France,2.75,,Togo
2,A. Morin,Atsane,1676,2015,70%,France,3.0,,Togo
3,A. Morin,Akata,1680,2015,70%,France,3.5,,Togo
4,A. Morin,Quilla,1704,2015,70%,France,3.5,,Peru


In [6]:
# Data types of "choco"

choco.dtypes

Company \n(Maker-if known)            object
Specific Bean Origin\nor Bar Name     object
REF                                    int64
Review\nDate                           int64
Cocoa\nPercent                        object
Company\nLocation                     object
Rating                               float64
Bean\nType                            object
Broad Bean\nOrigin                    object
dtype: object

# 04 Data Wrangling

### 01 Drop Columns

No column will be dropped at this point.

### 02  Rename Columns

In [7]:
# Rename columns from "choco" to follow the same naming convention

In [8]:
# Create a variabel with new column names

column_names = {
    'Company \n(Maker-if known)' : 'company',
    'Specific Bean Origin\nor Bar Name' : 'bar_name',
    'REF' : 'ref',
    'Review\nDate' : 'review_date',
    'Cocoa\nPercent' : 'cocoa_percent',
    'Company\nLocation' : 'company_location',
    'Rating' : 'rating',
    'Bean\nType' : 'bean_type',
    'Broad Bean\nOrigin' : 'bean_origin'}

In [9]:
# Rename columns

choco.rename(columns = column_names, inplace=True)

In [10]:
choco.head()

Unnamed: 0,Company \n(Maker-if known),bar_name,ref,review_date,cocoa_percent,company_location,rating,bean_type,bean_origin
0,A. Morin,Agua Grande,1876,2016,63%,France,3.75,,Sao Tome
1,A. Morin,Kpime,1676,2015,70%,France,2.75,,Togo
2,A. Morin,Atsane,1676,2015,70%,France,3.0,,Togo
3,A. Morin,Akata,1680,2015,70%,France,3.5,,Togo
4,A. Morin,Quilla,1704,2015,70%,France,3.5,,Peru


In [11]:
# The first column does not get renamed. A different approach might work better.

In [12]:
# Get the column names

c_names = choco.columns.tolist()

In [13]:
c_names

['Company\xa0\n(Maker-if known)',
 'bar_name',
 'ref',
 'review_date',
 'cocoa_percent',
 'company_location',
 'rating',
 'bean_type',
 'bean_origin']

In [14]:
# Use the column name from "c_names" to rename the "Company \n(Maker-if known) " column


column_name = {
    'Company\xa0\n(Maker-if known)' : 'company'}

In [15]:
# Rename column "Company \n(Maker-if known)"

choco.rename(columns = column_name, inplace=True)

In [16]:
choco.head()

Unnamed: 0,company,bar_name,ref,review_date,cocoa_percent,company_location,rating,bean_type,bean_origin
0,A. Morin,Agua Grande,1876,2016,63%,France,3.75,,Sao Tome
1,A. Morin,Kpime,1676,2015,70%,France,2.75,,Togo
2,A. Morin,Atsane,1676,2015,70%,France,3.0,,Togo
3,A. Morin,Akata,1680,2015,70%,France,3.5,,Togo
4,A. Morin,Quilla,1704,2015,70%,France,3.5,,Peru


### 03 Change Data Types

In [17]:
# Change the "coca_percent" column from "object" to "numeric"

In [18]:
# Remove the "%" sign

choco['cocoa_percent'] = choco['cocoa_percent'].str.rstrip('%')

In [19]:
# Change the column to numeric format

choco['cocoa_percent'] = pd.to_numeric(choco['cocoa_percent'])

In [20]:
choco.dtypes

company              object
bar_name             object
ref                   int64
review_date           int64
cocoa_percent       float64
company_location     object
rating              float64
bean_type            object
bean_origin          object
dtype: object

# 05 Export Data

In [21]:
# Export the "choco" dataframe

choco.to_csv(os.path.join(path, '02 Data', 'Prepared Data', 'wrangled_chocolate_bars.csv'))