## Imports

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Cleaned Metadata Exploration

In [76]:
clean_meta_df = pd.read_csv('Mush_Me/data/metadata/02_cleaned_metadata/v1/metadata_mini_train.csv')
clean_meta_df.head(3)

Unnamed: 0.1,Unnamed: 0,level_0,index,date,year,month,day,countryCode,taxonID,scientificName,...,ImageUniqueID,Substrate,Latitude,Longitude,CoorUncert,Habitat,image_path,class_id,genus_id,family_id
0,0,0,0,2020-09-17,2020.0,9.0,17.0,DK,17215.0,Mycena crocata (Schrad.) P.Kumm.,...,2862684394-136762,dead wood (including bark),55.43381,10.200162,10.0,Unmanaged deciduous woodland,2862684394-136762.jpg,70,4,3
1,1,1,1,2017-08-16,2017.0,8.0,16.0,DK,10057.0,Agaricus augustus Fr.,...,2238502117-320693,soil,55.800171,12.478571,5.0,garden,2238502117-320693.jpg,1,0,0
2,2,2,2,2020-07-23,2020.0,7.0,23.0,DK,20027.0,Russula olivacea (Schaeff.) Fr.,...,2818074328-59948,soil,55.976322,12.293878,1.0,Mixed woodland (with coniferous and deciduous ...,2818074328-59948.jpg,151,5,4


In [77]:
clean_meta_df.columns

Index(['Unnamed: 0', 'level_0', 'index', 'date', 'year', 'month', 'day',
       'countryCode', 'taxonID', 'scientificName', 'phylum', 'class', 'order',
       'family', 'genus', 'specificEpithet', 'species', 'ImageUniqueID',
       'Substrate', 'Latitude', 'Longitude', 'CoorUncert', 'Habitat',
       'image_path', 'class_id', 'genus_id', 'family_id'],
      dtype='object')

## Feature 'Substrate'

In [78]:
clean_meta_df['Substrate'].value_counts()

soil                                23039
dead wood (including bark)           2706
leaf or needle litter                2554
wood                                 1086
bark of living trees                  344
mosses                                326
stems of herbs, grass etc             300
bark                                  218
wood and roots of living trees        165
wood chips or mulch                   136
dead stems of herbs, grass etc         95
cones                                  79
other substrate                        44
peat mosses                            26
living stems of herbs, grass etc       24
fungi                                  18
faeces                                 16
fire spot                              12
fruits                                  9
catkins                                 8
living leaves                           3
building stone (e.g. bricks)            1
liverworts                              1
lichens                           

In [79]:
clean_meta_df['Substrate'] = clean_meta_df['Substrate'].map({'soil':'soil',
                                                             'dead wood (including bark)':'dead wood',
                                                             'leaf or needle litter':'leaves',
                                                             'wood':'wood',
                                                             'bark of living trees':'bark',
                                                             'mosses':'mosses',
                                                             'stems of herbs, grass etc':'stems of herbs, grass',
                                                             'bark':'bark',
                                                             'wood and roots of living trees':'wood and roots of living trees',
                                                             'wood chips or mulch':'wood chips or mulch',
                                                             'dead stems of herbs, grass etc':'stems of herbs, grass',
                                                             'cones':'cones',
                                                             'other substrate':'other substrate',
                                                             'peat mosses':'peat mosses',
                                                             'living stems of herbs, grass etc':'stems of herbs, grass',
                                                             'fungi':'fungi',
                                                             'faeces':'faeces',
                                                             'fire spot':'fire spot',
                                                             'fruits':'fruits',
                                                             'catkins':'catkins',
                                                             'living leaves':'leaves',
                                                             'building stone (e.g. bricks)':'building stone (e.g. bricks)',
                                                             'liverworts':'liverworts',
                                                             'lichens':'lichens'})

In [80]:
clean_meta_df['Substrate'].value_counts()

soil                              23039
dead wood                          2706
leaves                             2557
wood                               1086
bark                                562
stems of herbs, grass               419
mosses                              326
wood and roots of living trees      165
wood chips or mulch                 136
cones                                79
other substrate                      44
peat mosses                          26
fungi                                18
faeces                               16
fire spot                            12
fruits                                9
catkins                               8
building stone (e.g. bricks)          1
liverworts                            1
lichens                               1
Name: Substrate, dtype: int64

## Feature 'Habitat'

In [81]:
clean_meta_df['Habitat'].value_counts()

Deciduous woodland                                      7953
Mixed woodland (with coniferous and deciduous trees)    5372
coniferous woodland/plantation                          4419
Unmanaged deciduous woodland                            3328
park/churchyard                                         2281
Unmanaged coniferous woodland                           1085
natural grassland                                       1063
lawn                                                     890
roadside                                                 779
Thorny scrubland                                         689
garden                                                   651
Bog woodland                                             349
Forest bog                                               330
hedgerow                                                 264
wooded meadow, grazing forest                            244
dune                                                     243
heath                   

In [82]:
clean_meta_df['Habitat'] = clean_meta_df['Habitat'].map({'Deciduous woodland':'deciduous woodland',
                                                         'Mixed woodland (with coniferous and deciduous trees)':'mixed woodland (coniferous and deciduous trees)',
                                                         'coniferous woodland/plantation':'coniferous woodland',
                                                         'Unmanaged deciduous woodland':'deciduous woodland',
                                                         'park/churchyard':'park',
                                                         'Unmanaged coniferous woodland':'coniferous woodland',
                                                         'natural grassland':'grassland',
                                                         'lawn':'lawn',
                                                         'roadside':'roadside',
                                                         'Thorny scrubland':'thorny scrubland',
                                                         'garden':'garden',
                                                         'Bog woodland':'wetland',
                                                         'Forest bog':'wetland',
                                                         'hedgerow':'hedgerow',
                                                         'wooded meadow, grazing forest':'wooded meadow',
                                                         'dune':'dune',
                                                         'heath':'uncultivated land',
                                                         'Willow scrubland':'willow scrubland',
                                                         'bog':'wetland',
                                                         'salt meadow':'salt meadow',
                                                         'Acidic oak woodland':'acidic oak woodland',
                                                         'other habitat':'other habitat',
                                                         'meadow':'meadow',
                                                         'gravel or clay pit':'gravel',
                                                         'improved grassland':'grassland',
                                                         'roof':'roof',
                                                         'ditch':'ditch',
                                                         'fallow field':'fallow land',
                                                         'fertilized field in rotation':'cultivated land',
                                                         'rock':'rock'})

In [83]:
clean_meta_df['Habitat'].value_counts()

deciduous woodland                                 11281
coniferous woodland                                 5504
mixed woodland (coniferous and deciduous trees)     5372
park                                                2281
grassland                                           1111
lawn                                                 890
wetland                                              866
roadside                                             779
thorny scrubland                                     689
garden                                               651
hedgerow                                             264
wooded meadow                                        244
dune                                                 243
uncultivated land                                    205
willow scrubland                                     201
salt meadow                                          148
acidic oak woodland                                  135
other habitat                  