In [1]:
import pandas as pd
import re
import math

In [2]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
%matplotlib inline

In [3]:
flower_data = pd.read_csv("General/flower.csv", error_bad_lines=False, delimiter=";")
flower_data

Unnamed: 0,Flower,Color,Meaning
0,Acacia,,Secret love
1,Acanthus,,Art
2,Aconite,,Misanthropy
3,Agrimony,,Thankfulness
4,Aloe,,Grief
5,Almond,,Promise
6,Amaranth (Globe),,Immortal love
7,Amaranth,,Immortality [1]
8,Amaryllis,,Pride
9,Ambrosia,,Love is reciprocated


## Data cleaning

- Lowercase all the descriptions
- Delete the reference [*] to text
- Separate entries for each flower color: flower + [color]
- Separate entries for each meaning: separate by their ','

#### 1. Lowercase all the descriptions and delete [*] references

In [4]:
flower_data['Meaning'] = flower_data['Meaning'].str.lower().str.replace("\[.*\]", "").str.rstrip()
flower_data['Meaning'].head(10)

0             secret love
1                     art
2             misanthropy
3            thankfulness
4                   grief
5                 promise
6           immortal love
7             immortality
8                   pride
9    love is reciprocated
Name: Meaning, dtype: object

#### 2. Fill in flower name for each flower color entries: flower + [color]

In [5]:
type(flower_data['Flower'].iat[143]) == str

False

In [6]:
precedent_flower = ""
for i in range(0, len(flower_data["Flower"])):
    
    if(type(flower_data["Flower"].iat[i]) == str):
        precedent_flower = flower_data["Flower"].iat[i]
        if(type(flower_data["Color"].iat[i]) == str):
            flower_data["Flower"].iat[i] = precedent_flower + " [" + flower_data["Color"].iat[i] + "]" 
   
    if(type(flower_data["Flower"].iat[i]) == float):
        if(type(flower_data["Color"].iat[i]) == str):
            flower_data["Flower"].iat[i] = precedent_flower + " [" + flower_data["Color"].iat[i] + "]" 
        else: 
            flower_data["Flower"].iat[i] = precedent_flower

flower_data.tail(50)

Unnamed: 0,Flower,Color,Meaning
144,Poppy [yellow],yellow,"wealth, success"
145,Rainflower,,"i love you back, i must atone for my sins, i w..."
146,Rose [red],red,true love
147,Rose [blue],blue,"mystery, attaining the impossible, love at fir..."
148,Rose [white],white,"silence or innocence, wistfulness, virtue, pur..."
149,Rose [dried white rose],dried white rose,sorrow
150,Rose [black],black,"death, hatred, farewell, rejuvenation or rebirth"
151,Rose [yellow],yellow,"friendship, jealousy, infidelity, apology, a b..."
152,Rose [pink],pink,grace
153,Rose [dark pink],dark pink,gratitude


In [7]:
flower_data_split_meaning = pd.DataFrame(flower_data.Meaning.str.split(',').values.tolist(), index=flower_data.Flower).stack()

flower_data_split_meaning = flower_data_split_meaning.reset_index()[[0, 'Flower']]
flower_data_split_meaning.columns = ['Meaning', 'Flower'] 
flower_data_clean = flower_data_split_meaning[['Flower', 'Meaning']]
flower_data_clean

Unnamed: 0,Flower,Meaning
0,Acacia,secret love
1,Acanthus,art
2,Aconite,misanthropy
3,Agrimony,thankfulness
4,Aloe,grief
5,Almond,promise
6,Amaranth (Globe),immortal love
7,Amaranth,immortality
8,Amaryllis,pride
9,Ambrosia,love is reciprocated


### Export the clean data to csv

In [8]:
flower_data_clean.to_csv("General/flower_cleaned.csv", sep=";", index = False)