# Clean Cannabis

This notebook is the cleaning of the dataset from:
- https://www.dolthub.com/repositories/dolthub/marijuana-data/query/master?q=SELECT+*+FROM+%60leafly%60+ORDER+BY+%60strain%60+ASC%2C+%60effects%60+ASC+LIMIT+1000

In [1]:
import pandas as pd

In [2]:
file_path = r'C:\Users\Baile\Documents\DSI 24\capstone\marijuana-data.csv.csv'
weed = pd.read_csv(file_path)

In [3]:
weed.head()

Unnamed: 0,strain,type,rating,effects,flavor,description
0,100-Og,hybrid,4.0,"Creative,Energetic,Tingly,Euphoric,Relaxed","Earthy,Sweet,Citrus",$100 OG is a 50/50 hybrid strain that packs a ...
1,1024,sativa,4.4,"Uplifted,Happy,Relaxed,Energetic,Creative","Spicy/Herbal,Sage,Woody",1024 is a sativa-dominant hybrid bred in Spain...
2,13-Dawgs,hybrid,4.2,"Tingly,Creative,Hungry,Relaxed,Uplifted","Apricot,Citrus,Grapefruit",13 Dawgs is a hybrid of G13 and Chemdawg genet...
3,24K-Gold,hybrid,4.6,"Happy,Relaxed,Euphoric,Uplifted,Talkative","Citrus,Earthy,Orange","Also known as Kosher Tangie, 24k Gold is a 60%..."
4,3-Bears-Og,indica,0.0,,,3 Bears OG by Mephisto Genetics is an autoflow...


In [4]:
weed.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   strain       1000 non-null   object 
 1   type         1000 non-null   object 
 2   rating       1000 non-null   float64
 3   effects      964 non-null    object 
 4   flavor       937 non-null    object 
 5   description  989 non-null    object 
dtypes: float64(1), object(5)
memory usage: 47.0+ KB


In [5]:
weed.dtypes

strain          object
type            object
rating         float64
effects         object
flavor          object
description     object
dtype: object

In [6]:
weed.columns

Index(['strain', 'type', 'rating', 'effects', 'flavor', 'description'], dtype='object')

In [7]:
weed.shape

(1000, 6)

# Filling Null Values

In [8]:
weed.isnull().sum()

strain          0
type            0
rating          0
effects        36
flavor         63
description    11
dtype: int64

## Effects Column

In [9]:
null_effects = weed[weed['effects'].isnull()]

In [10]:
null_effects

Unnamed: 0,strain,type,rating,effects,flavor,description
4,3-Bears-Og,indica,0.0,,,3 Bears OG by Mephisto Genetics is an autoflow...
15,831-Og,indica,5.0,,,"831 OG, aka Santa Cruz OG, is an indica-domina..."
52,Afrikaner,sativa,0.0,,,Afrikaner by Cannabaal and master grower Waldo...
93,Alpha-Cow,sativa,5.0,,,Alpha Cow is a sativa strain that brings toget...
106,Amnesia-Ganja-Haze,sativa,5.0,,,Amnesia Ganja Haze is another award-winning st...
153,Bad-Azz-Kush,hybrid,5.0,,,Bad Azz Kush by Barney’s Farm was created with...
185,Bermuda-Sour,hybrid,0.0,,,Bermuda Sour by Deschutes Growery is a hybrid ...
259,Blizzard-Bush,hybrid,0.0,,,Blizzard Bush by Greenpoint Seeds is a resinou...
319,Blue-Trane,hybrid,0.0,,,Blue Trane is a cut with a mysterious heritage...
349,Blueberry-Triple-Og,indica,4.0,,,"Blueberry Triple OG, also known as Triple Blue..."


In [11]:
effects_dict = {
    '3-Bears-Og': 'Relaxed, Euphoric',
    '831-Og': 'Relaxed, Hungry, Creative',
    'Afrikaner': 'Euphoric, Sleepy',
    'Alpha-Cow': 'Relaxed',
    'Amnesia-Ganja-Haze': 'Relaxed',
    'Bad-Azz-Kush': 'Relaxed, Uplifted',
    'Bermuda-Sour': 'Relaxed, Uplifted, Hungry',
    'Blizzard-Bush': 'Relaxed, Happy, Uplifted',
    'Blue-Trane': 'Relaxed, Sleepy',
    'Blueberry-Triple-Og': 'Relaxed, Happy, Uplifted, Tingly',
    'Bodega-Bubblegum': 'Uplifted, Energized',
    'Boombaye': 'Sleepy, Euphoric',
    'Bright-Moments': 'Social, Euphoric',
    'Bubba-Doja': 'Relaxed, Sleepy, Euphoric',
    'Bubba-Hash': 'Relaxed, Hungry',
    'Burnt-Cookies': 'Social, Uplifted, Happy, Tingly',
    'C3': 'Relaxed, Sleepy, Uplifted, Euphoric',
    'Cackleberry': 'Euphoric, Relaxed, Social',
    'Calyx-Og': 'Creative, Hungry, Uplifted, Relaxed',
    'Caramel-Kona-Coffee-Cookies': 'Talkative, Creative, Energetic, Uplifted',
    'Cbd-Rich-Blessing': 'Relaxed, Happy, Uplifted',
    'Cherry-Fire': 'Calm, Uplifted, Hungry, Creative',
    'Chocoholic': 'Relaxed, Hungry, Sleepy',
    'Chocolate-Meringue': 'Social, Hungry, Euphoric',
    'Chocolate-Tonic': 'Relaxed, Happy, Creative, Uplifted',
    'Cronuts': 'Uplifted, Energized, Euphoric',
    'Dirty-Old-Bastard': 'Relaxed, Happy, Social',
    'Dr-Bubbles': 'Focused, Social, Euphoric',
    'Dr-Quantum': 'Uplifted, Happy, Creative',
    'Erdpurt': 'Relaxed, Happy, Creative',
    'Fighting-Buddha': 'Creative, Social, Uplifted, Focused',
    'Frosty-Jesus': 'Relaxed, Hungry, Tingly',
    'G-Force': 'Sleepy, Relaxed',
    'Godzilla-Blood': 'Uplifted, Happy, Euphoric',
    'Golden-Soda': 'Relaxed, Happy, Uplifted',
    'Grape-Dawg': 'Relaxed, Happy, Sleepy, Hungry'
}

In [12]:
for strain, effects in effects_dict.items():
    weed.loc[weed['strain'] == strain, 'effects'] = effects

In [13]:
weed.head()

Unnamed: 0,strain,type,rating,effects,flavor,description
0,100-Og,hybrid,4.0,"Creative,Energetic,Tingly,Euphoric,Relaxed","Earthy,Sweet,Citrus",$100 OG is a 50/50 hybrid strain that packs a ...
1,1024,sativa,4.4,"Uplifted,Happy,Relaxed,Energetic,Creative","Spicy/Herbal,Sage,Woody",1024 is a sativa-dominant hybrid bred in Spain...
2,13-Dawgs,hybrid,4.2,"Tingly,Creative,Hungry,Relaxed,Uplifted","Apricot,Citrus,Grapefruit",13 Dawgs is a hybrid of G13 and Chemdawg genet...
3,24K-Gold,hybrid,4.6,"Happy,Relaxed,Euphoric,Uplifted,Talkative","Citrus,Earthy,Orange","Also known as Kosher Tangie, 24k Gold is a 60%..."
4,3-Bears-Og,indica,0.0,"Relaxed, Euphoric",,3 Bears OG by Mephisto Genetics is an autoflow...


In [14]:
weed.isnull().sum()

strain          0
type            0
rating          0
effects         0
flavor         63
description    11
dtype: int64

In [15]:
pd.set_option('display.max_rows', None)

## Flavors Column

In [16]:
null_flavors = weed[weed['flavor'].isnull()]

In [17]:
null_flavors[['strain', 'flavor']]

Unnamed: 0,strain,flavor
4,3-Bears-Og,
15,831-Og,
30,Acid-Dough,
52,Afrikaner,
85,Allen-Iverson-Og,
93,Alpha-Cow,
98,Als-Dream,
103,Amethyst,
106,Amnesia-Ganja-Haze,
123,Arabian-Gold,


In [18]:
flavors_dict = {
    '3-Bears-Og': 'Sweet, Earthy, Pungent',
    '831-Og': 'Spicy, Peppery, Citrusy',
    'Acid-Dough': 'Sweet, Earthy, Tropical',
    'Afrikaner': 'Sweet, Spicy, Earthy',
    'Allen-Iverson-Og': 'Sweet, Earthy, Pungent',
    'Alpha-Cow': 'Sweet, Berry, Earthy',
    'Als-Dream': 'Citrusy, Earthy, Pungent',
    'Amethyst': 'Berry, Sweet, Earthy',
    'Amnesia-Ganja-Haze': 'Sweet, Citrusy, Lemon',
    'Arabian-Gold': 'Earthy, Spicy',
    'Bad-Azz-Kush': 'Sweet, Earthy, Pine',
    'Batgirl': 'Sweet, Berry, Chemical',
    'Bcn-Diesel': 'Sour, Lemon, Earthy',
    'Bermuda-Sour': 'Diesel, Sweet, Citrusy',
    'Berts-Cookie-Dough': 'Sweet, Earthy, Vanilla',
    'Birds-Eye': 'Spicy, Sweet',
    'Black-Larry-Bird': 'Sweet, Lemon, Earthy',
    'Black-Roses': 'Earthy, Floral, Fruity',
    'Blizzard-Bush': 'Sweet, Diesel, Peppery',
    'Blue-Trane': 'Fruity, Sweet, Earthy',
    'Blueberry-Triple-Og': 'Earthy, Fruity',
    'Blukashima': 'Sweet, Berry, Spicy',
    'Bodega-Bubblegum': 'Sweet, Fruity, Earthy',
    'Boombaye': 'Earthy, Sweet, Floral',
    'Bright-Moments': 'Grape, Sweet, Floral',
    'Broke-Diesel': 'Sweet, Sour, Diesel',
    'Bronze-Whaler': 'Sweet, Citrus, Earthy',
    'Bubba-Berry': 'Sweet, Fruity, Spicy',
    'Bubba-Doja': 'Woody, Earthy',
    'Bubba-Hash': 'Spicy, Earthy, Sweet',
    'Burnt-Cookies': 'Lemon, Sour, Citrus, Fruity',
    'C3': 'Sweet, Spicy, Lemon',
    'Cackleberry': 'Berry, Sweet, Spicy, Fruity',
    'Calyx-Og': 'Earthy, Citrus, Sour',
    'Caramel-Kona-Coffee-Cookies': 'Sweet, Spicy, Coffee',
    'Cbd-Rich-Blessing': 'Earthy, Citrus, Sour',
    'Chem-Jong-Ill': 'Floral, Lemon, Chemical',
    'Cherry-Fire': 'Earthy, Sweet, Fruity',
    'Chocoholic': 'Berry, Sweet, Fruity',
    'Chocolate-Meringue': 'Lemon, Sweet, Earthy',
    'Chocolate-Tonic': 'Sweet, Herbal, Earthy',
    'Confidential-Wreck': 'Citrus, Earthy, Lemon',
    'Cronuts': 'Earthy, Woody, Sweet',
    'Diamond-Socks': 'Earthy, Fruity, Sweet',
    'Dj-Andy-Williams': 'Citrus, Vanilla, Herbal',
    'Do-Over-Og': 'Sweet, Pine, Spicy',
    'Domino': 'Spicy, Peppery',
    'Double-Tap': 'Berry, Grape, Sweet',
    'Dougs-Varin': 'Citrus, Spicy, Woody',
    'Dr-Bubbles': 'Floral, Fruity, Earthy, Sweet',
    'Dr-Quantum': 'Citrus, Earthy, SOur',
    'Erdpurt': 'Earthy, Berry, SPicy',
    'Ether': 'Chemical, Sweet, Diesel',
    'Fighting-Buddha': 'Spicy, Berry, Sweet',
    'First-48': 'Citrus, Fresh, Sour',
    'Frosty-Jesus': 'Sweet, Sour, Lemon',
    'G-Force': 'Diesel, Sweet',
    'Godzilla-Blood': 'Berry, Sour, Spicy',
    'Golden-Soda': 'Citrus, Grape, Sweet',
    'Goldwing': 'Herbal, Sweet',
    'Grape-Dawg': 'Fruity, Earthy, Sweet',
    'Grimmdica': 'Earthy, Butter, Cheese',
    'Guido-Kush': 'Fruity, Berry, Sweet'
}

In [19]:
for strain, flavor in flavors_dict.items():
    weed.loc[weed['strain'] == strain, 'flavor'] = flavor

In [20]:
weed.head()

Unnamed: 0,strain,type,rating,effects,flavor,description
0,100-Og,hybrid,4.0,"Creative,Energetic,Tingly,Euphoric,Relaxed","Earthy,Sweet,Citrus",$100 OG is a 50/50 hybrid strain that packs a ...
1,1024,sativa,4.4,"Uplifted,Happy,Relaxed,Energetic,Creative","Spicy/Herbal,Sage,Woody",1024 is a sativa-dominant hybrid bred in Spain...
2,13-Dawgs,hybrid,4.2,"Tingly,Creative,Hungry,Relaxed,Uplifted","Apricot,Citrus,Grapefruit",13 Dawgs is a hybrid of G13 and Chemdawg genet...
3,24K-Gold,hybrid,4.6,"Happy,Relaxed,Euphoric,Uplifted,Talkative","Citrus,Earthy,Orange","Also known as Kosher Tangie, 24k Gold is a 60%..."
4,3-Bears-Og,indica,0.0,"Relaxed, Euphoric","Sweet, Earthy, Pungent",3 Bears OG by Mephisto Genetics is an autoflow...


In [21]:
weed.isnull().sum()

strain          0
type            0
rating          0
effects         0
flavor          0
description    11
dtype: int64

## Ratings Column

The ratings column didnt have any null values. However, I noticed that some of the values were 0 so I had to fill them in with the correct rating 

In [22]:
zero_rating_strains = weed[weed['rating'] == 0.0]

In [23]:
zero_rating_strains[['strain', 'rating']]

Unnamed: 0,strain,rating
4,3-Bears-Og,0.0
52,Afrikaner,0.0
164,Bangi-Haze,0.0
185,Bermuda-Sour,0.0
259,Blizzard-Bush,0.0
319,Blue-Trane,0.0
358,Bodega-Bubblegum,0.0
363,Boombaye,0.0
364,Bootlegger,0.0
377,Bright-Moments,0.0


In [24]:
zero_rating = {
    '3-Bears-Og': 4.5,
    'Afrikaner': 4.4,
    'Bangi-Haze': 4.6,
    'Bermuda-Sour': 4.1,
    'Blizzard-Bush': 4.3,
    'Blue-Trane': 4.4,
    'Bodega-Bubblegum': 4.4,
    'Boombaye': 4.6,
    'Bootlegger': 4.5,
    'Bright-Moments': 4.7,
    'Bronze-Whaler': 4.1,
    'Bubba-Hash': 4.6,
    'Burnt-Cookies': 4.5,
    'Cackleberry': 4.7,
    'Calyx-Og': 4.4,
    'Caramel-Kona-Coffee-Cookies': 4.4,
    'Cbd-Rich-Blessing': 4.4,
    'Cherry-Fire': 4.7,
    'Chocoholic': 4.6,
    'Chocolate-Meringue': 4.6,
    'Chocolate-Tonic': 4.6,
    'Dr-Quantum': 4.4,
    'Erdpurt': 4.7,
    'Fn-Louzer': 4.4,
    'Godzilla-Blood': 4.3,
    'Golden-Soda': 4.5,
    'Grape-Dawg': 4.4,
    'Hannibal-Nectar': 4.4
}

## Adding the updated ratings back to the original dataframe

In [25]:
for strain, rating in zero_rating.items():
    weed.loc[weed['strain'] == strain, 'rating'] = rating

In [26]:
weed.head()

Unnamed: 0,strain,type,rating,effects,flavor,description
0,100-Og,hybrid,4.0,"Creative,Energetic,Tingly,Euphoric,Relaxed","Earthy,Sweet,Citrus",$100 OG is a 50/50 hybrid strain that packs a ...
1,1024,sativa,4.4,"Uplifted,Happy,Relaxed,Energetic,Creative","Spicy/Herbal,Sage,Woody",1024 is a sativa-dominant hybrid bred in Spain...
2,13-Dawgs,hybrid,4.2,"Tingly,Creative,Hungry,Relaxed,Uplifted","Apricot,Citrus,Grapefruit",13 Dawgs is a hybrid of G13 and Chemdawg genet...
3,24K-Gold,hybrid,4.6,"Happy,Relaxed,Euphoric,Uplifted,Talkative","Citrus,Earthy,Orange","Also known as Kosher Tangie, 24k Gold is a 60%..."
4,3-Bears-Og,indica,4.5,"Relaxed, Euphoric","Sweet, Earthy, Pungent",3 Bears OG by Mephisto Genetics is an autoflow...


## Decided I didn't need the description column so I dropped it

In [27]:
weed = weed.drop(columns=['description'])

In [28]:
weed.head()

Unnamed: 0,strain,type,rating,effects,flavor
0,100-Og,hybrid,4.0,"Creative,Energetic,Tingly,Euphoric,Relaxed","Earthy,Sweet,Citrus"
1,1024,sativa,4.4,"Uplifted,Happy,Relaxed,Energetic,Creative","Spicy/Herbal,Sage,Woody"
2,13-Dawgs,hybrid,4.2,"Tingly,Creative,Hungry,Relaxed,Uplifted","Apricot,Citrus,Grapefruit"
3,24K-Gold,hybrid,4.6,"Happy,Relaxed,Euphoric,Uplifted,Talkative","Citrus,Earthy,Orange"
4,3-Bears-Og,indica,4.5,"Relaxed, Euphoric","Sweet, Earthy, Pungent"


In [29]:
weed.shape

(1000, 5)

## Removing Duplicates 

In [30]:
def has_duplicate_flavors(flavor_str):
    if pd.isna(flavor_str):
        return False
    flavors = flavor_str.split(',')
    return len(flavors) != len(set(flavors))

In [31]:
duplicate_flavors_rows = weed[weed['flavor'].apply(has_duplicate_flavors)]

In [32]:
duplicate_flavors_rows

Unnamed: 0,strain,type,rating,effects,flavor
275,Blue-Cheese,indica,4.3,"Relaxed,Happy,Euphoric,Sleepy,Hungry","Blue,Cheese,Cheese,Blueberry"
394,Bubble-Cheese,indica,3.8,"Relaxed,Hungry,Sleepy,Tingly,Talkative","Blue,Cheese,Cheese,Pungent"
716,Durban-Cheese,hybrid,4.2,"Hungry,Euphoric,Uplifted,Giggly,Relaxed","Blue,Cheese,Cheese,Earthy"


In [33]:
def remove_duplicate_flavors(flavor_str):
    if pd.isna(flavor_str):
        return flavor_str
    flavors = flavor_str.split(',')
    unique_flavors = set(flavors)
    return ','.join(unique_flavors)

In [34]:
weed['flavor'] = weed['flavor'].apply(remove_duplicate_flavors)

In [35]:
weed[weed['flavor'].str.contains('Cheese', case=False, na=False)]

Unnamed: 0,strain,type,rating,effects,flavor
22,Aberdeen,hybrid,4.3,"Uplifted,Euphoric,Giggly,Energetic,Sleepy","Cheese,Blue,Menthol,Chemical"
65,Alchemy,hybrid,4.8,"Relaxed,Sleepy,Uplifted,Hungry,Tingly","Cheese,Spicy/Herbal,Sweet"
78,Alien-Reunion,hybrid,4.9,"Euphoric,Uplifted,Happy,Relaxed,Talkative","Pungent,Cheese,Skunk"
108,Ancient-Kush,indica,4.6,"Sleepy,Relaxed,Aroused,Creative,Energetic","Cheese,Mint,Skunk"
155,Balmoral,hybrid,4.2,"Relaxed,Happy,Uplifted,Focused,Talkative","Cheese,Pine,Earthy"
164,Bangi-Haze,sativa,4.6,"Aroused,Uplifted,Euphoric,Hungry","Fruit,Berry,Cheese,Tree"
180,Bedica,indica,4.3,"Relaxed,Sleepy,Hungry,Focused,Euphoric","Pungent,Cheese,Woody"
191,Bertberry-Cheesecake,sativa,5.0,"Uplifted,Creative,Euphoric,Focused,Energetic","Fruit,Berry,Cheese,Tree"
193,Bettie-Page,hybrid,4.2,"Creative,Uplifted,Energetic,Euphoric,Relaxed","Blue,Cheese,Sweet,Earthy"
199,Big-Budda-Cheese,hybrid,4.2,"Happy,Relaxed,Euphoric,Uplifted,Giggly","Cheese,Flowery,Earthy"


In [36]:
def has_duplicate_effects(effects_str):
    if pd.isna(effects_str):
        return False
    effects = effects_str.split(',')
    return len(effects) != len(set(effects))

In [37]:
duplicate_effects_rows = weed[weed['effects'].apply(has_duplicate_effects)]

In [38]:
duplicate_effects_rows

Unnamed: 0,strain,type,rating,effects,flavor


## Updating the flavors that had word flaws

In [39]:
flavor_updates = {
    "Alice-In-Wonderland": "Pine,Minty,Sweet",
    "Ancient-Kush": "Skunk,Cheese,Minty",
    "Arjans-Ultra-Haze-1": "Minty,Earthy",
    "Banana-Cream-Og": "Minty,Pine",
    "Bubblegun": "Pepper,Minty,Blueberry",
    "Chemdawg-4-X-Alien-Og-Kush": "Nutty,Minty,Flowery",
    "Chicle": "Ammonia,Diesel,Minty",
    "Double-Mint": "Minty, Lemon",
    "Dutch-Dragon": "Citrus,Sweet,Minty",
    "Goldberry": "Minty,Earthy,Woody",
    "Guava-Gelato": "Minty,Grapefruit,Tropical",
    "Harlequin-Gdp": "Minty,Earthy",
    "Hashberry": "Berry,Sweet,Minty",
    "Banana-Candy": "Tree,Fruity,Earthy,Sweet",
    "Banana-Kush": "Sweet,Tropical,Tree,Fruity",
    "Bangi-Haze": "Berry,Tree,Fruity,Cheese",
    "Bc-Sweet-Tooth": "Sweet,Honey,Tree,Fruity",
    "Bertberry-Cheesecake": "Cheese,Berry,Tree,Fruity",
    "Blue-Diamond": "Sweet,Tree,Fruity,Pineapple",
    "Enemy-Of-The-State": "Spicy,Herbal,Tree,Fruity,Earthy",
    "Fred-Flipn-Stoned": "Spicy,Herbal,Sage,Tree,Fruit",
    "Aberdeen": "Menthol,Blueberry,Cheese,Chemical",
    "Bettie-Page": "Earthy,Sweet,Blue,Cheese",
    "Blue-Cheese": "Cheese,Blueberry",
    "Bubble-Cheese": "Blueberry,Cheese,Pungent",
    "Cat-Piss": "Earthy,Woody,Blueberry,Cheese",
    "Durban-Cheese": "Blueberry,Cheese,Earthy",
    "831-Og": "Spicy, Pepper, Citrus",
    "Als-Dream": "Citrus, Earthy, Pungent",
    "Amnesia-Ganja-Haze": "Sweet, Citrus, Lemon",
    "Bermuda-Sour": "Diesel, Sweet, Citrus",
    "Black-Roses": "Earthy, Flowery, Fruity",
    "Boombaye": "Earthy, Sweet, Flowery",
    "Bright-Moments": "Grape, Sweet, Flowery",
    "Chem-Jong-Ill": "Flowery, Lemon, Chemical",
    "Dr-Bubbles": "Flowery, Fruity, Earthy, Sweet",
    "Blizzard-Bush": "Sweet, Diesel, Pepper",
    "Domino": "Spicy, Pepper",
    "Dr-Quantum": "Citrus, Earthy, Sour",
    "1024": "Spicy,Herbal, Sage, Woody",
    "Ace-Killer-Og": "Earthy, Diesel, Spicy,Herbal",
    "Afghani-Cbd": "Flowery, Chemical, Spicy,Herbal",
    "Afpak": "Pine, Spicy,Herbal, Earthy",
    "African": "Spicy,Herbal, Pungent, Pepper",
    "Air-Jordan-Og": "Diesel, Pine, Spicy,Herbal",
    "Alaskan-Ice": "Sweet, Earthy, Spicy,Herbal",
    "Alchemy": "Cheese, Sweet, Spicy,Herbal",
    "Alien-Stardawg": "Diesel, Spicy,Herbal, Sage",
    "Alien-Technology": "Earthy, Spicy,Herbal, Woody",
    "Allkush": "Spicy,Herbal, Honey, Earthy",
    "Alpha-Express": "Diesel, Earthy, Spicy,Herbal",
    "Alpha-Og": "Spicy,Herbal, Sweet, Berry",
    "Americano": "Earthy, Spicy,Herbal, Tobacco",
    "Apollo-13": "Earthy, Spicy,Herbal, Sweet",
    "Argyle": "Spicy,Herbal, Pungent, Earthy",
    "Armagnac": "Spicy,Herbal, Sweet, Flowery",
    "Ash": "Pungent, Skunk, Spicy,Herbal",
    "Aspen-Og": "Blueberry, Spicy,Herbal, Earthy",
    "Atomic-Goat": "Spicy,Herbal, Nutty, Sage",
    "Aurora-Indica": "Earthy, Spicy,Herbal, Tropical",
    "Ayahuasca-Purple": "Spicy,Herbal, Pungent, Earthy",
    "B4": "Spicy,Herbal, Berry, Skunk",
    "Banana-Peel": "Spicy,Herbal, Tropical, Earthy",
    "Bianca": "Earthy, Woody, Spicy,Herbal",
    "Big-Band": "Sweet, Spicy,Herbal, Sage",
    "Big-Smooth": "Blueberry, Spicy,Herbal, Nutty",
    "Biker-Leblanc": "Spicy,Herbal, Sage, Earthy",
    "Biochem": "Spicy,Herbal, Earthy, Cheese",
    "Black-Dhalia": "Sweet, Earthy, Spicy,Herbal",
    "Blackberry-Diesel": "Diesel, Spicy,Herbal, Pungent",
    "Blackberry-Dream": "Spicy,Herbal, Sweet, Grapefruit",
    "Blackberry-Haze": "Berry, Sweet, Spicy,Herbal",
    "Blackberry-Rhino": "Berry, Sweet, Spicy,Herbal",
    "Bloodhound": "Earthy, Sage, Spicy,Herbal",
    "Blowfish": "Spicy,Herbal, Pepper, Sage",
    "Blucifer": "Blueberry, Spicy,Herbal, Berry",
    "Blue-Dynamite": "Berry, Spicy,Herbal, Pine",
    "Blue-Heron": "Woody, Spicy,Herbal, Pine",
    "Brazil-Amazonia": "Spicy,Herbal, Berry, Earthy",
    "Caramella": "Menthol, Spicy,Herbal, Sweet",
    "Cbd-Kush": "Earthy, Woody, Spicy,Herbal",
    "Cello-Sweet-Og": "Sweet, Spicy,Herbal, Pineapple",
    "Cherry-Sauce": "Spicy,Herbal, Sweet, Flowery",
    "Chloe": "Earthy, Spicy,Herbal, Sweet",
    "Chocolate-Thai": "Spicy,Herbal, Coffee, Woody",
    "Chocolate-Thunder": "Honey, Nutty, Spicy,Herbal",
    "Chronic": "Woody, Spicy,Herbal, Sweet",
    "Clockwork-Orange": "Spicy,Herbal, Sweet, Citrus",
    "Colombian-Gold": "Earthy, Pungent, Spicy,Herbal",
    "Conspiracy-Kush": "Spicy,Herbal, Berry, Earthy",
    "Corleone-Kush": "Earthy, Spicy,Herbal, Flowery",
    "Critical-Plus": "Woody, Spicy,Herbal, Pungent",
    "Crouching-Tiger-Hidden-Alien": "Pine, Spicy,Herbal, Earthy",
    "Dakini-Kush": "Earthy, Coffee, Spicy,Herbal",
    "Damnesia": "Spicy,Herbal, Pepper, Earthy",
    "Dance-World": "Earthy, Citrus, Spicy,Herbal",
    "Dark-Side-Of-The-Moon": "Berry, Earthy, Spicy,Herbal",
    "Dawgfather-Og": "Spicy,Herbal, Pepper, Pungent",
    "Devil-Fruit": "Spicy,Herbal, Pepper, Sweet",
    "Devils-Tit": "Spicy,Herbal, Pepper, Earthy",
    "Diablo": "Sweet, Earthy, Spicy,Herbal",
    "Diamond-Og": "Earthy, Spicy,Herbal, Pine",
    "Diamond-Valley-Kush": "Earthy, Spicy,Herbal, Pungent",
    "Digweed": "Earthy, Skunk, Spicy,Herbal",
    "Dirty-Old-Bastard": "Spicy,Herbal, Flowery",
    "Dizzy-Og": "Sweet, Pungent, Spicy,Herbal",
    "Dorit": "Ammonia, Spicy,Herbal, Minty",
    "Dragons-Breath": "Spicy,Herbal, Earthy, Skunk",
    "Dream-Beaver": "Pineapple, Woody, Spicy,Herbal",
    "Dream-Lotus": "Spicy,Herbal, Woody, Tea",
    "Dutch-Crunch": "Spicy,Herbal, Pepper, Citrus",
    "Early-Miss": "Pepper, Ammonia, Spicy,Herbal",
    "Earth-Og": "Earthy, Spicy,Herbal, Pine",
    "East-Coast-Alien": "Diesel, Earthy, Spicy,Herbal",
    "Easy-Bud": "Lemon, Skunk, Spicy,Herbal",
    "Easy-Peezy": "Lemon, Spicy,Herbal, Earthy",
    "Ebola-7": "Sweet, Spicy,Herbal, Pine",
    "El-Nino": "Earthy, Woody, Spicy,Herbal",
    "Enemy-Of-The-State": "Spicy,Herbal, Tree, Fruit, Earthy",
    "Faygo-Red-Pop": "Strawberry, Spicy,Herbal, Sage",
    "Fire-Alien-Kush": "Earthy, Woody, Spicy,Herbal",
    "Fire-Alien-Strawberry": "Spicy,Herbal, Strawberry, Earthy",
    "Flaming-Cookies": "Spicy,Herbal, Sweet, Diesel",
    "Four-Cups": "Earthy, Diesel, Spicy,Herbal",
    "Fred-Flipn-Stoned": "Spicy,Herbal, Sage, Tree, Fruit",
    "Full-Moon": "Spicy,Herbal, Citrus, Earthy",
    "Galactica-Og": "Spicy,Herbal, Earthy, Pungent",
    "Gigabud": "Sweet, Spicy,Herbal, Tropical",
    "Gobbstopper": "Spicy,Herbal, Sweet, Berry",
    "Godfather-Og": "Earthy, Pine, Spicy,Herbal",
    "Godfather-Purple-Kush": "Berry, Spicy,Herbal, Pungent",
    "Godzilla-Glue": "Tropical, Spicy,Herbal, Sweet",
    "Gog-Magog": "Earthy, Pungent, Spicy,Herbal",
    "Golden-Gage": "Pepper, Spicy,Herbal, Citrus",
    "Good-Medicine": "Earthy, Spicy,Herbal, Citrus",
    "Green-Crack-Extreme": "Sweet, Earthy, Spicy,Herbal",
    "Green-Door-Kush": "Spicy,Herbal, Citrus, Woody",
    "Green-Love-Potion": "Spicy,Herbal, Lavender, Sweet",
    "Grimace-Og": "Pungent, Diesel, Spicy,Herbal",
    "Harle-Tsu": "Earthy, Woody, Spicy,Herbal",
    "Harley-Storm": "Spicy,Herbal, Sweet, Tropical",
    "Hash-Plant": "Earthy, Spicy,Herbal, Diesel"
}


## Adding the updates back to the original dataframe

In [40]:
for strain, flavor in flavor_updates.items():
    weed.loc[weed['strain'] == strain, 'flavor'] = flavor

In [41]:
weed.head(20)

Unnamed: 0,strain,type,rating,effects,flavor
0,100-Og,hybrid,4.0,"Creative,Energetic,Tingly,Euphoric,Relaxed","Citrus,Sweet,Earthy"
1,1024,sativa,4.4,"Uplifted,Happy,Relaxed,Energetic,Creative","Spicy,Herbal, Sage, Woody"
2,13-Dawgs,hybrid,4.2,"Tingly,Creative,Hungry,Relaxed,Uplifted","Grapefruit,Citrus,Apricot"
3,24K-Gold,hybrid,4.6,"Happy,Relaxed,Euphoric,Uplifted,Talkative","Citrus,Orange,Earthy"
4,3-Bears-Og,indica,4.5,"Relaxed, Euphoric","Earthy,Sweet, Pungent"
5,3-Kings,hybrid,4.4,"Relaxed,Euphoric,Happy,Uplifted,Hungry","Pungent,Sweet,Earthy"
6,303-Og,indica,4.2,"Relaxed,Happy,Euphoric,Uplifted,Giggly","Pungent,Citrus,Earthy"
7,3D-Cbd,sativa,4.6,"Uplifted,Focused,Happy,Talkative,Relaxed","Flowery,Woody,Earthy"
8,3Rd-Coast-Panama-Chunk,hybrid,4.7,"Energetic,Happy,Talkative,Uplifted","Diesel,Pine"
9,3X-Crazy,indica,4.4,"Relaxed,Tingly,Happy,Euphoric,Uplifted","Sweet,Grape,Earthy"


## Final cleaned dataframe

In [42]:
weed.to_csv('clean_weed.csv', index=False)