In [1]:
import pandas as pd
import numpy as np
from pygbif import species

# 21: Araneae_and_Opiliones.csv : Arachnida

In [4]:
# Read file into dataframe

df = pd.read_csv('Araneae_and_Opiliones.csv')
df.head()

Unnamed: 0,Species,BE,Bestand,Trend_lang,Trend_kurz,RF,BB,D,GfU,Vorzugs_habitat,Ökolog_Typ
0,"Dysdera erythrina (WALCKENAER, 1802)",0,ex,,,,*,*,"7, 8, 12",7,(h)w
1,"Oonops domesticus DALMAS, 1916",0,ex,,,,*,D,"1a, 2a",16,"syn, trog"
2,"Eresus kollari ROSSI, 1846",0,ex,,,,2,2,"3, 7a, 12a",11,"x, th"
3,"Dipoena coracina (C. L. KOCH, 1837)",0,ex,,,,*,G,"7a, 11b",12,"x, th"
4,"Theonoe minutissima (O. P.-CAMBRIDGE, 1879)",0,ex,,,,2,3,"2d, 3, 11c",2,h


In [5]:
# Removing columns "Bestand","Trend_lang","Trend_kurz", "RF", "BB", "D", "GS", "GfU"
df.drop(columns = ["Bestand",
                   "Trend_lang",
                   "Trend_kurz",
                   "RF",
                   "BB",
                   "D",
                   "GfU", 
                   "Vorzugs_habitat",
                   "Ökolog_Typ"
                  ], inplace = True)
df

Unnamed: 0,Species,BE
0,"Dysdera erythrina (WALCKENAER, 1802)",0
1,"Oonops domesticus DALMAS, 1916",0
2,"Eresus kollari ROSSI, 1846",0
3,"Dipoena coracina (C. L. KOCH, 1837)",0
4,"Theonoe minutissima (O. P.-CAMBRIDGE, 1879)",0
5,"Agyneta subtilis (O. P.-CAMBRIDGE, 1863)",0
6,"Centromerus capucinus (SIMON, 1884)",0
7,"Centromerus sellarius (SIMON, 1884)",0
8,"Dismodicus elevatus (C. L. KOCH, 1838)",0
9,"Glyphesis cottonae (LA Touche, 1945)",0


In [9]:
# Editing species column based on binomial nomenclature

df1 = df.Species.str.split(pat=" ", n = 3, expand = True)
df.Species = df1[0].str.cat(df1[1], sep = " ")
df.head(10)

Unnamed: 0,Species,BE
0,Dysdera erythrina,0
1,Oonops domesticus,0
2,Eresus kollari,0
3,Dipoena coracina,0
4,Theonoe minutissima,0
5,Agyneta subtilis,0
6,Centromerus capucinus,0
7,Centromerus sellarius,0
8,Dismodicus elevatus,0
9,Glyphesis cottonae,0


In [10]:
# Creating a dictionary and extracting GBIF data
d = {}

for i in df.Species:     
   d[i] = species.name_backbone(name=str(i), kingdom='animals')
print("Complete")

Complete


In [11]:
gbif_data = pd.DataFrame.from_dict(d, orient = "index")
gbif_data.reset_index(inplace = True)
gbif_data.columns
df["Family"] = gbif_data["family"]
df

Unnamed: 0,Species,BE,Family
0,Dysdera erythrina,0,Dysderidae
1,Oonops domesticus,0,Oonopidae
2,Eresus kollari,0,Eresidae
3,Dipoena coracina,0,Theridiidae
4,Theonoe minutissima,0,Theridiidae
5,Agyneta subtilis,0,Linyphiidae
6,Centromerus capucinus,0,Linyphiidae
7,Centromerus sellarius,0,Linyphiidae
8,Dismodicus elevatus,0,Linyphiidae
9,Glyphesis cottonae,0,Linyphiidae


In [12]:
# Add common names
df["Deutscher_Name"] = ""
# Add reference column
df["Reference"] = "Rote Liste und Gesamtartenliste der Spinnen (Araneae) und Gesamtartenliste der Weberknechte (Opiliones) von Berlin"
# Adding Letzter_Nachweis column
df["Letzter_Nachweis"] = ""

In [13]:
# Rearranging the dataframe for uniformity
df = df[["Species",
         "Family", 
         "BE", 
         "Letzter_Nachweis",
         "Deutscher_Name", 
         "Reference"]]

In [14]:
df

Unnamed: 0,Species,Family,BE,Letzter_Nachweis,Deutscher_Name,Reference
0,Dysdera erythrina,Dysderidae,0,,,Rote Liste und Gesamtartenliste der Spinnen (A...
1,Oonops domesticus,Oonopidae,0,,,Rote Liste und Gesamtartenliste der Spinnen (A...
2,Eresus kollari,Eresidae,0,,,Rote Liste und Gesamtartenliste der Spinnen (A...
3,Dipoena coracina,Theridiidae,0,,,Rote Liste und Gesamtartenliste der Spinnen (A...
4,Theonoe minutissima,Theridiidae,0,,,Rote Liste und Gesamtartenliste der Spinnen (A...
5,Agyneta subtilis,Linyphiidae,0,,,Rote Liste und Gesamtartenliste der Spinnen (A...
6,Centromerus capucinus,Linyphiidae,0,,,Rote Liste und Gesamtartenliste der Spinnen (A...
7,Centromerus sellarius,Linyphiidae,0,,,Rote Liste und Gesamtartenliste der Spinnen (A...
8,Dismodicus elevatus,Linyphiidae,0,,,Rote Liste und Gesamtartenliste der Spinnen (A...
9,Glyphesis cottonae,Linyphiidae,0,,,Rote Liste und Gesamtartenliste der Spinnen (A...


In [15]:
# Saving the extracted csv to the "Data_cleaning_step1" directory
# export the dataframe as CSV

df.to_csv('../../Transformation/Data_cleaning_step1/21_Araneae_and_Opiliones.csv', 
          index=False)

# 22: Mollusca_Gastropoda_and_Bivalvia.csv : Mollusca

In [16]:
# Read file into dataframe

df = pd.read_csv('22_Mollusca_Gastropoda_and_Bivalvia.csv')
df.head(10)

Unnamed: 0,Species,BE,Bestand,Trend_lang,Trend_kurz,RF,BB,D,GS,GfU,Vorzugs_habitat,Letzter_Nachweis
0,"Gyraulus rossmaessleri (AUERSWALD, 1852)",0,ex,,,,1,1,,14a,"SG, SK",vor 1980
1,"Myxas glutinosa (O. F. MÜLLER, 1774)",0,ex,,,,0,1,,14a,"FF, SG",1954
2,"Chondrula tridens tridens (O. F. MÜLLER, 1774)",0,ex,,,,3,1,,9a,"W, GF",vor 1950
3,"Helicigona lapicida (LINNAEUS, 1758)",0,ex,,,,3,*,,9a,"WB, WC, WE",1886
4,"Platyla polita (W. HARTMANN , 1840)",0,ex,,,,0,3,,"8, 9","WB, WE, WC",1933
5,"Vertigo geyeri LINDHOLM, 1925",0,ex,,,,0,1,II,14a,"SR, ME, GF",vor 1950
6,"Pisidium pulchellum JENYNS, 1832",0,ex,,,,1,1,,14a,"FF, FB, FG",vor 1950
7,"Unio crassus PHILIPSSON, 1788*",0,ex,,,,1,1,"§§, II","14a, 12c",FB,vor 1993


In [17]:
# Removing columns "Bestand","Trend_lang","Trend_kurz", "RF", "BB", "D", "GS", "GfU"
df.drop(columns = ["Bestand",
                   "Trend_lang",
                   "Trend_kurz",
                   "RF",
                   "BB",
                   "D",
                   "GS",
                   "GfU", 
                   "Vorzugs_habitat"
                  ], inplace = True)
df.head()

Unnamed: 0,Species,BE,Letzter_Nachweis
0,"Gyraulus rossmaessleri (AUERSWALD, 1852)",0,vor 1980
1,"Myxas glutinosa (O. F. MÜLLER, 1774)",0,1954
2,"Chondrula tridens tridens (O. F. MÜLLER, 1774)",0,vor 1950
3,"Helicigona lapicida (LINNAEUS, 1758)",0,1886
4,"Platyla polita (W. HARTMANN , 1840)",0,1933


In [18]:
# Editing species column based on binomial nomenclature

df1 = df.Species.str.split(pat=" ", n = 3, expand = True)
df.Species = df1[0].str.cat(df1[1], sep = " ")
df.head(10)

Unnamed: 0,Species,BE,Letzter_Nachweis
0,Gyraulus rossmaessleri,0,vor 1980
1,Myxas glutinosa,0,1954
2,Chondrula tridens,0,vor 1950
3,Helicigona lapicida,0,1886
4,Platyla polita,0,1933
5,Vertigo geyeri,0,vor 1950
6,Pisidium pulchellum,0,vor 1950
7,Unio crassus,0,vor 1993


In [19]:
# Creating a dictionary and extracting GBIF data
d = {}

for i in df.Species:     
   d[i] = species.name_backbone(name=str(i), kingdom='animals')
print("Complete")

Complete


In [20]:
gbif_data = pd.DataFrame.from_dict(d, orient = "index")
gbif_data.reset_index(inplace = True)
gbif_data.columns
df["Family"] = gbif_data["family"]
df

Unnamed: 0,Species,BE,Letzter_Nachweis,Family
0,Gyraulus rossmaessleri,0,vor 1980,Planorbidae
1,Myxas glutinosa,0,1954,Lymnaeidae
2,Chondrula tridens,0,vor 1950,Enidae
3,Helicigona lapicida,0,1886,Helicidae
4,Platyla polita,0,1933,Aciculidae
5,Vertigo geyeri,0,vor 1950,Vertiginidae
6,Pisidium pulchellum,0,vor 1950,Sphaeriidae
7,Unio crassus,0,vor 1993,Unionidae


In [21]:
# Add common names
df["Deutscher_Name"] = ""
# Add reference column
df["Reference"] = "Rote Liste und Gesamtartenliste der Weichtiere (Mollusca: Gastropoda und Bivalvia) von Berlin"
# Adding Letzter_Nachweis column
#df["Letzter_Nachweis"] = ""
df.head()

Unnamed: 0,Species,BE,Letzter_Nachweis,Family,Deutscher_Name,Reference
0,Gyraulus rossmaessleri,0,vor 1980,Planorbidae,,Rote Liste und Gesamtartenliste der Weichtiere...
1,Myxas glutinosa,0,1954,Lymnaeidae,,Rote Liste und Gesamtartenliste der Weichtiere...
2,Chondrula tridens,0,vor 1950,Enidae,,Rote Liste und Gesamtartenliste der Weichtiere...
3,Helicigona lapicida,0,1886,Helicidae,,Rote Liste und Gesamtartenliste der Weichtiere...
4,Platyla polita,0,1933,Aciculidae,,Rote Liste und Gesamtartenliste der Weichtiere...


In [22]:
# Rearranging the dataframe for uniformity
df = df[["Species",
         "Family", 
         "BE", 
         "Letzter_Nachweis",
         "Deutscher_Name", 
         "Reference"]]
df.head(10)

Unnamed: 0,Species,Family,BE,Letzter_Nachweis,Deutscher_Name,Reference
0,Gyraulus rossmaessleri,Planorbidae,0,vor 1980,,Rote Liste und Gesamtartenliste der Weichtiere...
1,Myxas glutinosa,Lymnaeidae,0,1954,,Rote Liste und Gesamtartenliste der Weichtiere...
2,Chondrula tridens,Enidae,0,vor 1950,,Rote Liste und Gesamtartenliste der Weichtiere...
3,Helicigona lapicida,Helicidae,0,1886,,Rote Liste und Gesamtartenliste der Weichtiere...
4,Platyla polita,Aciculidae,0,1933,,Rote Liste und Gesamtartenliste der Weichtiere...
5,Vertigo geyeri,Vertiginidae,0,vor 1950,,Rote Liste und Gesamtartenliste der Weichtiere...
6,Pisidium pulchellum,Sphaeriidae,0,vor 1950,,Rote Liste und Gesamtartenliste der Weichtiere...
7,Unio crassus,Unionidae,0,vor 1993,,Rote Liste und Gesamtartenliste der Weichtiere...


In [23]:
# Saving the extracted csv to the "Data_cleaning_step1" directory
# export the dataframe as CSV

df.to_csv('../../Transformation/Data_cleaning_step1/22_Mollusca_Gastropoda_and_Bivalvia.csv', 
          index=False)

# 23: Pisces.csv : Pisces

In [24]:
# Read file into dataframe

df = pd.read_csv('Pisces.csv')
df.head(10)

Unnamed: 0,Common_name,Species,Redlist_category,Bestand,Trend_lang,Trend_kurz,Riskio
0,Bachneunauge,Lampetra planeri,0,ex,,,
1,Barbe,Barbus barbus,0,ex,,,
2,Europäischer\tStör,Acipenser sturio,0,ex,,,
3,Flussneunauge,Lampetra fluviatilis,0,ex,,,
4,Lachs,Salmo salar,0,ex,,,
5,Meerneunauge,Petromyzon marinus,0,ex,,,
6,Zährte,Vimba vimba,0,ex,,,


In [25]:
# Removing columns "Bestand","Trend_lang","Trend_kurz", "RF", "BB", "D", "GS", "GfU"
df.drop(columns = ["Bestand",
                   "Trend_lang",
                   "Trend_kurz",
                   "Riskio"
                  ], inplace = True)
df.head()

Unnamed: 0,Common_name,Species,Redlist_category
0,Bachneunauge,Lampetra planeri,0
1,Barbe,Barbus barbus,0
2,Europäischer\tStör,Acipenser sturio,0
3,Flussneunauge,Lampetra fluviatilis,0
4,Lachs,Salmo salar,0


In [27]:
# Editing species column based on binomial nomenclature

#df1 = df.Species.str.split(pat=" ", n = 3, expand = True)
#df.Species = df1[0].str.cat(df1[1], sep = " ")
#df.head(10)

In [28]:
# Creating a dictionary and extracting GBIF data
d = {}

for i in df.Species:     
   d[i] = species.name_backbone(name=str(i), kingdom='animals')
print("Complete")

Complete


In [29]:
gbif_data = pd.DataFrame.from_dict(d, orient = "index")
gbif_data.reset_index(inplace = True)
gbif_data.columns
df["Family"] = gbif_data["family"]
df

Unnamed: 0,Common_name,Species,Redlist_category,Family
0,Bachneunauge,Lampetra planeri,0,Petromyzontidae
1,Barbe,Barbus barbus,0,Cyprinidae
2,Europäischer\tStör,Acipenser sturio,0,Acipenseridae
3,Flussneunauge,Lampetra fluviatilis,0,Petromyzontidae
4,Lachs,Salmo salar,0,Salmonidae
5,Meerneunauge,Petromyzon marinus,0,Petromyzontidae
6,Zährte,Vimba vimba,0,Cyprinidae


In [30]:
# renaming columns
df.rename({
    "Common_name": "Deutscher_Name",
    "Redlist_category": "BE"
}, 
    axis='columns',
    inplace = True
)
df

Unnamed: 0,Deutscher_Name,Species,BE,Family
0,Bachneunauge,Lampetra planeri,0,Petromyzontidae
1,Barbe,Barbus barbus,0,Cyprinidae
2,Europäischer\tStör,Acipenser sturio,0,Acipenseridae
3,Flussneunauge,Lampetra fluviatilis,0,Petromyzontidae
4,Lachs,Salmo salar,0,Salmonidae
5,Meerneunauge,Petromyzon marinus,0,Petromyzontidae
6,Zährte,Vimba vimba,0,Cyprinidae


In [31]:
# Add common names
#df["Deutscher_Name"] = ""
# Add reference column
df["Reference"] = "Gesamtartenliste und Rote Liste der Fische und Neunaugen (Pisces et Cyclostomata) von Berlin"
# Adding Letzter_Nachweis column
df["Letzter_Nachweis"] = ""
df.head()

Unnamed: 0,Deutscher_Name,Species,BE,Family,Reference,Letzter_Nachweis
0,Bachneunauge,Lampetra planeri,0,Petromyzontidae,Gesamtartenliste und Rote Liste der Fische und...,
1,Barbe,Barbus barbus,0,Cyprinidae,Gesamtartenliste und Rote Liste der Fische und...,
2,Europäischer\tStör,Acipenser sturio,0,Acipenseridae,Gesamtartenliste und Rote Liste der Fische und...,
3,Flussneunauge,Lampetra fluviatilis,0,Petromyzontidae,Gesamtartenliste und Rote Liste der Fische und...,
4,Lachs,Salmo salar,0,Salmonidae,Gesamtartenliste und Rote Liste der Fische und...,


In [32]:
# Rearranging the dataframe for uniformity
df = df[["Species",
         "Family", 
         "BE", 
         "Letzter_Nachweis",
         "Deutscher_Name", 
         "Reference"]]
df.head(10)

Unnamed: 0,Species,Family,BE,Letzter_Nachweis,Deutscher_Name,Reference
0,Lampetra planeri,Petromyzontidae,0,,Bachneunauge,Gesamtartenliste und Rote Liste der Fische und...
1,Barbus barbus,Cyprinidae,0,,Barbe,Gesamtartenliste und Rote Liste der Fische und...
2,Acipenser sturio,Acipenseridae,0,,Europäischer\tStör,Gesamtartenliste und Rote Liste der Fische und...
3,Lampetra fluviatilis,Petromyzontidae,0,,Flussneunauge,Gesamtartenliste und Rote Liste der Fische und...
4,Salmo salar,Salmonidae,0,,Lachs,Gesamtartenliste und Rote Liste der Fische und...
5,Petromyzon marinus,Petromyzontidae,0,,Meerneunauge,Gesamtartenliste und Rote Liste der Fische und...
6,Vimba vimba,Cyprinidae,0,,Zährte,Gesamtartenliste und Rote Liste der Fische und...


In [33]:
# Replace the \t type space with normal space
df.at[2,"Deutscher_Name"]= "Europäischer Stör"
df

Unnamed: 0,Species,Family,BE,Letzter_Nachweis,Deutscher_Name,Reference
0,Lampetra planeri,Petromyzontidae,0,,Bachneunauge,Gesamtartenliste und Rote Liste der Fische und...
1,Barbus barbus,Cyprinidae,0,,Barbe,Gesamtartenliste und Rote Liste der Fische und...
2,Acipenser sturio,Acipenseridae,0,,Europäischer Stör,Gesamtartenliste und Rote Liste der Fische und...
3,Lampetra fluviatilis,Petromyzontidae,0,,Flussneunauge,Gesamtartenliste und Rote Liste der Fische und...
4,Salmo salar,Salmonidae,0,,Lachs,Gesamtartenliste und Rote Liste der Fische und...
5,Petromyzon marinus,Petromyzontidae,0,,Meerneunauge,Gesamtartenliste und Rote Liste der Fische und...
6,Vimba vimba,Cyprinidae,0,,Zährte,Gesamtartenliste und Rote Liste der Fische und...


In [34]:
# Saving the extracted csv to the "Data_cleaning_step1" directory
# export the dataframe as CSV

df.to_csv('../../Transformation/Data_cleaning_step1/23_Pisces.csv', 
          index=False)

# 24: Reptilia.csv : Reptilia

In [35]:
# Read file into dataframe

df = pd.read_csv('Reptilia.csv')
df.head(10)

Unnamed: 0,Species,BE,Bestand,Trend_lang,Trend_kurz,RF,BB,D,GS,GfU,Common_name
0,"Emys orbicularis (LINNAEUS, 1758)",0,ex,,,,1,1,"§§, II, IV","4a, 5a, 5b, 10c, 12b , 13a, 14b,",Europäische Sumpfschildkröte


In [36]:
# Removing columns "Bestand","Trend_lang","Trend_kurz", "RF", "BB", "D", "GS", "GfU"
df.drop(columns = ["Bestand",
                   "Trend_lang",
                   "Trend_kurz",
                   "RF",
                   "BB",
                   "D",
                   "GS", 
                   "GfU"
                  ], inplace = True)
df.head()

Unnamed: 0,Species,BE,Common_name
0,"Emys orbicularis (LINNAEUS, 1758)",0,Europäische Sumpfschildkröte


In [37]:
# Editing species column based on binomial nomenclature

df1 = df.Species.str.split(pat=" ", n = 3, expand = True)
df.Species = df1[0].str.cat(df1[1], sep = " ")
df.head(10)

Unnamed: 0,Species,BE,Common_name
0,Emys orbicularis,0,Europäische Sumpfschildkröte


In [38]:
# Creating a dictionary and extracting GBIF data
d = {}

for i in df.Species:     
   d[i] = species.name_backbone(name=str(i), kingdom='animals')
print("Complete")

Complete


In [39]:
gbif_data = pd.DataFrame.from_dict(d, orient = "index")
gbif_data.reset_index(inplace = True)
gbif_data.columns
df["Family"] = gbif_data["family"]
df

Unnamed: 0,Species,BE,Common_name,Family
0,Emys orbicularis,0,Europäische Sumpfschildkröte,Emydidae


In [40]:
# renaming columns
df.rename({
    "Common_name": "Deutscher_Name"
}, 
    axis='columns',
    inplace = True
)
df

Unnamed: 0,Species,BE,Deutscher_Name,Family
0,Emys orbicularis,0,Europäische Sumpfschildkröte,Emydidae


In [41]:
# Add common names
#df["Deutscher_Name"] = ""
# Add reference column
df["Reference"] = "Rote Liste und Gesamtartenliste der Kriechtiere (Reptilia) von Berlin"
# Adding Letzter_Nachweis column
df["Letzter_Nachweis"] = ""
df.head()

Unnamed: 0,Species,BE,Deutscher_Name,Family,Reference,Letzter_Nachweis
0,Emys orbicularis,0,Europäische Sumpfschildkröte,Emydidae,Rote Liste und Gesamtartenliste der Kriechtier...,


In [42]:
# Rearranging the dataframe for uniformity
df = df[["Species",
         "Family", 
         "BE", 
         "Letzter_Nachweis",
         "Deutscher_Name", 
         "Reference"]]
df.head(10)

Unnamed: 0,Species,Family,BE,Letzter_Nachweis,Deutscher_Name,Reference
0,Emys orbicularis,Emydidae,0,,Europäische Sumpfschildkröte,Rote Liste und Gesamtartenliste der Kriechtier...


In [43]:
# Saving the extracted csv to the "Data_cleaning_step1" directory
# export the dataframe as CSV

df.to_csv('../../Transformation/Data_cleaning_step1/24_Reptilia.csv', 
          index=False)

# 25: Amphibia.csv : Amphibia

In [44]:
# Read file into dataframe

df = pd.read_csv('Amphibia.csv')
df.head(10)

Unnamed: 0,Species,BE,Bestand,Trend_lang,Trend_kurz,RF,BB,D,GS,GfU,Letzter_Nachweis,Common_name
0,"Hyla arborea (LINNAEUS, 1758)",0,ex,,,,2,3,"§§, IV","1a, 2a, 4a, 6a,10c, 11c,",vor 1960,Mitteleuropäischer
1,"Pelophylax lessonae (CAMERANAO, 1782)",0,ex,,,,G,G,"§, IV","2d, 10c, 11c, 12b",1991,Kleiner Wasserfrosch


In [45]:
# Removing columns "Bestand","Trend_lang","Trend_kurz", "RF", "BB", "D", "GS", "GfU"
df.drop(columns = ["Bestand",
                   "Trend_lang",
                   "Trend_kurz",
                   "RF",
                   "BB",
                   "D",
                   "GS",
                   "GfU"
                  ], inplace = True)
df.head()

Unnamed: 0,Species,BE,Letzter_Nachweis,Common_name
0,"Hyla arborea (LINNAEUS, 1758)",0,vor 1960,Mitteleuropäischer
1,"Pelophylax lessonae (CAMERANAO, 1782)",0,1991,Kleiner Wasserfrosch


In [46]:
# Editing species column based on binomial nomenclature

df1 = df.Species.str.split(pat=" ", n = 3, expand = True)
df.Species = df1[0].str.cat(df1[1], sep = " ")
df.head(10)

Unnamed: 0,Species,BE,Letzter_Nachweis,Common_name
0,Hyla arborea,0,vor 1960,Mitteleuropäischer
1,Pelophylax lessonae,0,1991,Kleiner Wasserfrosch


In [47]:
# Creating a dictionary and extracting GBIF data
d = {}

for i in df.Species:     
   d[i] = species.name_backbone(name=str(i), kingdom='animals')
print("Complete")

Complete


In [48]:
gbif_data = pd.DataFrame.from_dict(d, orient = "index")
gbif_data.reset_index(inplace = True)
gbif_data.columns
df["Family"] = gbif_data["family"]
df

Unnamed: 0,Species,BE,Letzter_Nachweis,Common_name,Family
0,Hyla arborea,0,vor 1960,Mitteleuropäischer,Hylidae
1,Pelophylax lessonae,0,1991,Kleiner Wasserfrosch,Ranidae


In [49]:
# renaming columns
df.rename({
    "Common_name": "Deutscher_Name"
}, 
    axis='columns',
    inplace = True
)
df

Unnamed: 0,Species,BE,Letzter_Nachweis,Deutscher_Name,Family
0,Hyla arborea,0,vor 1960,Mitteleuropäischer,Hylidae
1,Pelophylax lessonae,0,1991,Kleiner Wasserfrosch,Ranidae


In [50]:
# Add common names
#df["Deutscher_Name"] = ""
# Add reference column
df["Reference"] = "Rote Liste und Gesamtartenliste der Lurche (Amphibia) von Berlin"
# Adding Letzter_Nachweis column
#df["Letzter_Nachweis"] = ""
df.head()

Unnamed: 0,Species,BE,Letzter_Nachweis,Deutscher_Name,Family,Reference
0,Hyla arborea,0,vor 1960,Mitteleuropäischer,Hylidae,Rote Liste und Gesamtartenliste der Lurche (Am...
1,Pelophylax lessonae,0,1991,Kleiner Wasserfrosch,Ranidae,Rote Liste und Gesamtartenliste der Lurche (Am...


In [51]:
# Rearranging the dataframe for uniformity
df = df[["Species",
         "Family", 
         "BE", 
         "Letzter_Nachweis",
         "Deutscher_Name", 
         "Reference"]]
df.head(10)

Unnamed: 0,Species,Family,BE,Letzter_Nachweis,Deutscher_Name,Reference
0,Hyla arborea,Hylidae,0,vor 1960,Mitteleuropäischer,Rote Liste und Gesamtartenliste der Lurche (Am...
1,Pelophylax lessonae,Ranidae,0,1991,Kleiner Wasserfrosch,Rote Liste und Gesamtartenliste der Lurche (Am...


In [52]:
# Saving the extracted csv to the "Data_cleaning_step1" directory
# export the dataframe as CSV

df.to_csv('../../Transformation/Data_cleaning_step1/25_Amphibia.csv', 
          index=False)

# 26: Aves.csv : Aves

In [95]:
# Read file into dataframe

df = pd.read_csv('Aves.csv')
df.head(10)

Unnamed: 0,Common_name,Species,Status,BP_Rev,Häufig_keitsklasse,Trend_lang,Trend_kurz,Risik,BE2013,BE2003,BB2008,D2007
0,Löffelente *,Anas clypeata,I ex,"0 – 1, 2012: 1 BP",ex,,,,0,1,2.0,3.0
1,Moorente,Aythya nyroca,I ex,erloschen,ex,,,,0,0,1.0,1.0
2,Gänsesäger,Mergus merganser,I ex,erloschen,ex,,,,0,0,2.0,2.0
3,Rebhuhn *,Perdix perdix,I ex,"erloschen, zuletzt: 1996",ex,,,,0,1,2.0,2.0
4,Birkhuhn,Tetrao terix,I ex,erloschen,ex,,,,0,0,1.0,2.0
5,Schwarzstorch,Ciconia nigra,I ex,erloschen,ex,,,,0,0,3.0,
6,Fischadler,Pandion haliaetus,I ex,erloschen,ex,,,,0,0,,3.0
7,Schreiadler,Aquila pomarina,I ex,erloschen,ex,,,,0,0,1.0,1.0
8,Kornweihe,Circus cyaneus,I ex,erloschen,ex,,,,0,0,0.0,2.0
9,Wiesenweihe,Circus pygargus,I ex,erloschen,ex,,,,0,0,2.0,2.0


In [96]:
# Removing columns "Bestand","Trend_lang","Trend_kurz", "RF", "BB", "D", "GS", "GfU"
df.drop(columns = ["Häufig_keitsklasse",
                   "Trend_lang",
                   "Trend_kurz",
                   "Risik",
                   "BB2008",
                   "D2007"
                  ], inplace = True)
df.head()

Unnamed: 0,Common_name,Species,Status,BP_Rev,BE2013,BE2003
0,Löffelente *,Anas clypeata,I ex,"0 – 1, 2012: 1 BP",0,1
1,Moorente,Aythya nyroca,I ex,erloschen,0,0
2,Gänsesäger,Mergus merganser,I ex,erloschen,0,0
3,Rebhuhn *,Perdix perdix,I ex,"erloschen, zuletzt: 1996",0,1
4,Birkhuhn,Tetrao terix,I ex,erloschen,0,0


In [97]:
# Remove the stars from Common_name

df1 = df.Common_name.str.split(pat="*", n = 1, expand = True)
df.Common_name = df1[0]
#df.head(10)
df

Unnamed: 0,Common_name,Species,Status,BP_Rev,BE2013,BE2003
0,Löffelente,Anas clypeata,I ex,"0 – 1, 2012: 1 BP",0,1
1,Moorente,Aythya nyroca,I ex,erloschen,0,0
2,Gänsesäger,Mergus merganser,I ex,erloschen,0,0
3,Rebhuhn,Perdix perdix,I ex,"erloschen, zuletzt: 1996",0,1
4,Birkhuhn,Tetrao terix,I ex,erloschen,0,0
5,Schwarzstorch,Ciconia nigra,I ex,erloschen,0,0
6,Fischadler,Pandion haliaetus,I ex,erloschen,0,0
7,Schreiadler,Aquila pomarina,I ex,erloschen,0,0
8,Kornweihe,Circus cyaneus,I ex,erloschen,0,0
9,Wiesenweihe,Circus pygargus,I ex,erloschen,0,0


In [98]:
# Creating a dictionary and extracting GBIF data
d = {}

for i in df.Species:     
   d[i] = species.name_backbone(name=str(i), kingdom='animals')
print("Complete")

Complete


In [99]:
gbif_data = pd.DataFrame.from_dict(d, orient = "index")
gbif_data.reset_index(inplace = True)
gbif_data.columns
df["Family"] = gbif_data["family"]
df

Unnamed: 0,Common_name,Species,Status,BP_Rev,BE2013,BE2003,Family
0,Löffelente,Anas clypeata,I ex,"0 – 1, 2012: 1 BP",0,1,Anatidae
1,Moorente,Aythya nyroca,I ex,erloschen,0,0,Anatidae
2,Gänsesäger,Mergus merganser,I ex,erloschen,0,0,Anatidae
3,Rebhuhn,Perdix perdix,I ex,"erloschen, zuletzt: 1996",0,1,Phasianidae
4,Birkhuhn,Tetrao terix,I ex,erloschen,0,0,Phasianidae
5,Schwarzstorch,Ciconia nigra,I ex,erloschen,0,0,Ciconiidae
6,Fischadler,Pandion haliaetus,I ex,erloschen,0,0,Pandionidae
7,Schreiadler,Aquila pomarina,I ex,erloschen,0,0,Accipitridae
8,Kornweihe,Circus cyaneus,I ex,erloschen,0,0,Accipitridae
9,Wiesenweihe,Circus pygargus,I ex,erloschen,0,0,Accipitridae


In [100]:
# renaming columns
df.rename({
    "Common_name": "Deutscher_Name",
    "Status": "BE"
}, 
    axis='columns',
    inplace = True
)
df

Unnamed: 0,Deutscher_Name,Species,BE,BP_Rev,BE2013,BE2003,Family
0,Löffelente,Anas clypeata,I ex,"0 – 1, 2012: 1 BP",0,1,Anatidae
1,Moorente,Aythya nyroca,I ex,erloschen,0,0,Anatidae
2,Gänsesäger,Mergus merganser,I ex,erloschen,0,0,Anatidae
3,Rebhuhn,Perdix perdix,I ex,"erloschen, zuletzt: 1996",0,1,Phasianidae
4,Birkhuhn,Tetrao terix,I ex,erloschen,0,0,Phasianidae
5,Schwarzstorch,Ciconia nigra,I ex,erloschen,0,0,Ciconiidae
6,Fischadler,Pandion haliaetus,I ex,erloschen,0,0,Pandionidae
7,Schreiadler,Aquila pomarina,I ex,erloschen,0,0,Accipitridae
8,Kornweihe,Circus cyaneus,I ex,erloschen,0,0,Accipitridae
9,Wiesenweihe,Circus pygargus,I ex,erloschen,0,0,Accipitridae


In [101]:
# Add common names
#df["Deutscher_Name"] = ""
# Add reference column
df["Reference"] = "Rote Liste und Liste der Brutvögel (Aves) von Berlin"
# Adding Letzter_Nachweis column
df["Letzter_Nachweis"] = ""
df.head()

Unnamed: 0,Deutscher_Name,Species,BE,BP_Rev,BE2013,BE2003,Family,Reference,Letzter_Nachweis
0,Löffelente,Anas clypeata,I ex,"0 – 1, 2012: 1 BP",0,1,Anatidae,Rote Liste und Liste der Brutvögel (Aves) von ...,
1,Moorente,Aythya nyroca,I ex,erloschen,0,0,Anatidae,Rote Liste und Liste der Brutvögel (Aves) von ...,
2,Gänsesäger,Mergus merganser,I ex,erloschen,0,0,Anatidae,Rote Liste und Liste der Brutvögel (Aves) von ...,
3,Rebhuhn,Perdix perdix,I ex,"erloschen, zuletzt: 1996",0,1,Phasianidae,Rote Liste und Liste der Brutvögel (Aves) von ...,
4,Birkhuhn,Tetrao terix,I ex,erloschen,0,0,Phasianidae,Rote Liste und Liste der Brutvögel (Aves) von ...,


In [102]:
# Rearranging the dataframe for uniformity
df = df[["Species",
         "Family", 
         "BE", 
         "Letzter_Nachweis",
         "Deutscher_Name", 
         "Reference"]]
df.head(10)

Unnamed: 0,Species,Family,BE,Letzter_Nachweis,Deutscher_Name,Reference
0,Anas clypeata,Anatidae,I ex,,Löffelente,Rote Liste und Liste der Brutvögel (Aves) von ...
1,Aythya nyroca,Anatidae,I ex,,Moorente,Rote Liste und Liste der Brutvögel (Aves) von ...
2,Mergus merganser,Anatidae,I ex,,Gänsesäger,Rote Liste und Liste der Brutvögel (Aves) von ...
3,Perdix perdix,Phasianidae,I ex,,Rebhuhn,Rote Liste und Liste der Brutvögel (Aves) von ...
4,Tetrao terix,Phasianidae,I ex,,Birkhuhn,Rote Liste und Liste der Brutvögel (Aves) von ...
5,Ciconia nigra,Ciconiidae,I ex,,Schwarzstorch,Rote Liste und Liste der Brutvögel (Aves) von ...
6,Pandion haliaetus,Pandionidae,I ex,,Fischadler,Rote Liste und Liste der Brutvögel (Aves) von ...
7,Aquila pomarina,Accipitridae,I ex,,Schreiadler,Rote Liste und Liste der Brutvögel (Aves) von ...
8,Circus cyaneus,Accipitridae,I ex,,Kornweihe,Rote Liste und Liste der Brutvögel (Aves) von ...
9,Circus pygargus,Accipitridae,I ex,,Wiesenweihe,Rote Liste und Liste der Brutvögel (Aves) von ...


In [106]:
# Replace | ex with 0
df["BE"] = 0
df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["BE"] = 0


Unnamed: 0,Species,Family,BE,Letzter_Nachweis,Deutscher_Name,Reference
0,Anas clypeata,Anatidae,0,,Löffelente,Rote Liste und Liste der Brutvögel (Aves) von ...
1,Aythya nyroca,Anatidae,0,,Moorente,Rote Liste und Liste der Brutvögel (Aves) von ...
2,Mergus merganser,Anatidae,0,,Gänsesäger,Rote Liste und Liste der Brutvögel (Aves) von ...
3,Perdix perdix,Phasianidae,0,,Rebhuhn,Rote Liste und Liste der Brutvögel (Aves) von ...
4,Tetrao terix,Phasianidae,0,,Birkhuhn,Rote Liste und Liste der Brutvögel (Aves) von ...
5,Ciconia nigra,Ciconiidae,0,,Schwarzstorch,Rote Liste und Liste der Brutvögel (Aves) von ...
6,Pandion haliaetus,Pandionidae,0,,Fischadler,Rote Liste und Liste der Brutvögel (Aves) von ...
7,Aquila pomarina,Accipitridae,0,,Schreiadler,Rote Liste und Liste der Brutvögel (Aves) von ...
8,Circus cyaneus,Accipitridae,0,,Kornweihe,Rote Liste und Liste der Brutvögel (Aves) von ...
9,Circus pygargus,Accipitridae,0,,Wiesenweihe,Rote Liste und Liste der Brutvögel (Aves) von ...


In [107]:
# Saving the extracted csv to the "Data_cleaning_step1" directory
# export the dataframe as CSV

df.to_csv('../../Transformation/Data_cleaning_step1/26_Aves.csv', 
          index=False)

# 27: Mammalia.csv : Mammalia

In [108]:
# Read file into dataframe

df = pd.read_csv('Mammalia.csv')
df.head(10)

Unnamed: 0,Common_name,Species,BE,BB,DE,GS,Vorzugshabitate,Gefährdungsursachen
0,Mopsfledermaus,Barbastella barbastellus \n(SCHREBER),0,1,1,"§§, II W, O",,"2c, 9, 14h"
1,Hamster,Cricetus cricetus (LINNAEUS),0,1,2,§§,L,"1a, 6"
2,Hausratte,Rattus rattus (LINNAEUS),0,2,-,,O,"2c, 4d"
3,Rothirsch*,Cervus elaphus LINNAEUS*,0,-,,,W,"4d, 14b"


In [109]:
# Removing columns "Bestand","Trend_lang","Trend_kurz", "RF", "BB", "D", "GS", "GfU"
df.drop(columns = ["BB",
                   "DE",
                   "GS",
                   "Vorzugshabitate",
                   "Gefährdungsursachen"
                  ], inplace = True)
df.head()

Unnamed: 0,Common_name,Species,BE
0,Mopsfledermaus,Barbastella barbastellus \n(SCHREBER),0
1,Hamster,Cricetus cricetus (LINNAEUS),0
2,Hausratte,Rattus rattus (LINNAEUS),0
3,Rothirsch*,Cervus elaphus LINNAEUS*,0


In [110]:
# Editing species column based on binomial nomenclature

df1 = df.Species.str.split(pat=" ", n = 3, expand = True)
df.Species = df1[0].str.cat(df1[1], sep = " ")
df.head(10)

Unnamed: 0,Common_name,Species,BE
0,Mopsfledermaus,Barbastella barbastellus,0
1,Hamster,Cricetus cricetus,0
2,Hausratte,Rattus rattus,0
3,Rothirsch*,Cervus elaphus,0


In [111]:
# Creating a dictionary and extracting GBIF data
d = {}

for i in df.Species:     
   d[i] = species.name_backbone(name=str(i), kingdom='animals')
print("Complete")

Complete


In [112]:
gbif_data = pd.DataFrame.from_dict(d, orient = "index")
gbif_data.reset_index(inplace = True)
gbif_data.columns
df["Family"] = gbif_data["family"]
df

Unnamed: 0,Common_name,Species,BE,Family
0,Mopsfledermaus,Barbastella barbastellus,0,Vespertilionidae
1,Hamster,Cricetus cricetus,0,Cricetidae
2,Hausratte,Rattus rattus,0,Muridae
3,Rothirsch*,Cervus elaphus,0,Cervidae


In [113]:
# renaming columns
df.rename({
    "Common_name": "Deutscher_Name"
}, 
    axis='columns',
    inplace = True
)
df

Unnamed: 0,Deutscher_Name,Species,BE,Family
0,Mopsfledermaus,Barbastella barbastellus,0,Vespertilionidae
1,Hamster,Cricetus cricetus,0,Cricetidae
2,Hausratte,Rattus rattus,0,Muridae
3,Rothirsch*,Cervus elaphus,0,Cervidae


In [114]:
# Add common names
#df["Deutscher_Name"] = ""
# Add reference column
df["Reference"] = "Rote Liste und Gesamtartenliste der Säugetiere (Mammalia) von Berlin"
# Adding Letzter_Nachweis column
df["Letzter_Nachweis"] = ""
df.head()

Unnamed: 0,Deutscher_Name,Species,BE,Family,Reference,Letzter_Nachweis
0,Mopsfledermaus,Barbastella barbastellus,0,Vespertilionidae,Rote Liste und Gesamtartenliste der Säugetiere...,
1,Hamster,Cricetus cricetus,0,Cricetidae,Rote Liste und Gesamtartenliste der Säugetiere...,
2,Hausratte,Rattus rattus,0,Muridae,Rote Liste und Gesamtartenliste der Säugetiere...,
3,Rothirsch*,Cervus elaphus,0,Cervidae,Rote Liste und Gesamtartenliste der Säugetiere...,


In [115]:
# Remove the stars from Deutscher_Name

df1 = df.Deutscher_Name.str.split(pat="*", n = 1, expand = True)
df.Deutscher_Name = df1[0]
#df.head(10)
df

Unnamed: 0,Deutscher_Name,Species,BE,Family,Reference,Letzter_Nachweis
0,Mopsfledermaus,Barbastella barbastellus,0,Vespertilionidae,Rote Liste und Gesamtartenliste der Säugetiere...,
1,Hamster,Cricetus cricetus,0,Cricetidae,Rote Liste und Gesamtartenliste der Säugetiere...,
2,Hausratte,Rattus rattus,0,Muridae,Rote Liste und Gesamtartenliste der Säugetiere...,
3,Rothirsch,Cervus elaphus,0,Cervidae,Rote Liste und Gesamtartenliste der Säugetiere...,


In [116]:
# Rearranging the dataframe for uniformity
df = df[["Species",
         "Family", 
         "BE", 
         "Letzter_Nachweis",
         "Deutscher_Name", 
         "Reference"]]
df.head(10)

Unnamed: 0,Species,Family,BE,Letzter_Nachweis,Deutscher_Name,Reference
0,Barbastella barbastellus,Vespertilionidae,0,,Mopsfledermaus,Rote Liste und Gesamtartenliste der Säugetiere...
1,Cricetus cricetus,Cricetidae,0,,Hamster,Rote Liste und Gesamtartenliste der Säugetiere...
2,Rattus rattus,Muridae,0,,Hausratte,Rote Liste und Gesamtartenliste der Säugetiere...
3,Cervus elaphus,Cervidae,0,,Rothirsch,Rote Liste und Gesamtartenliste der Säugetiere...


In [117]:
# Saving the extracted csv to the "Data_cleaning_step1" directory
# export the dataframe as CSV

df.to_csv('../../Transformation/Data_cleaning_step1/27_Mammalia.csv', 
          index=False)

# 28: Vascular_plants.csv : Vascular Plants

In [118]:
# Read file into dataframe

df = pd.read_csv('Vascular_plants.csv')
df.head(10)

Unnamed: 0,Species,BE,Bestand,Trend_lang,Trend_kurz,RF,BB,D,EU,GS,V,Zielart_2008,Zielart_2017,V.1,Letzter Nachweis,Common_name
0,Achillea salicifolia *,0,ex,,,,G,,,,,,!!,,1999,Weidenblatt-Schafgarbe
1,Agrostemma githago *,0,ex,,,,1,1.0,,,,,,,1989,Korn-Rade
2,Alchemilla plicata *,0,ex,,,,1,2.0,,,,,!!,,1990,Gefalteter Frauenmantel
3,Alchemilla vulgaris s. str.,0,ex,,,,1,,,,,,,,1969,Spitzlappiger Frauenmantel
4,Alisma gramineum,0,ex,,,,2,,3.0,,,,,,1950,Grasblättriger Froschlöffel
5,Allium lusitanicum,0,ex,,,,1,,,§,,,,,1859,Berg-Lauch
6,Alopecurus myosuroides *,0,ex,,,,,,,,,,,,1968,Acker-Fuchsschwanz
7,Althaea officinalis *,0,ex,,,,1,3.0,,§,,,,N,1859,Echter Eibisch
8,Ammophila arenaria *,0,ex,,,,,,,,,,,N,1859,Gemeiner Strandhafer
9,Anagallis minima,0,ex,,,,,3.0,,,,,,,1985,Acker-Kleinling


In [119]:
# Removing columns "Bestand","Trend_lang","Trend_kurz", "RF", "BB", "D", "GS", "GfU"
df.drop(columns = ["Bestand",
                   "Trend_lang",
                   "Trend_kurz",
                   "RF",
                   "BB",
                   "D",
                   "EU",
                   "GS", 
                   "V",
                   "Zielart_2008",
                   "Zielart_2017",
                   "V.1"
                  ], inplace = True)
df.head()

Unnamed: 0,Species,BE,Letzter Nachweis,Common_name
0,Achillea salicifolia *,0,1999,Weidenblatt-Schafgarbe
1,Agrostemma githago *,0,1989,Korn-Rade
2,Alchemilla plicata *,0,1990,Gefalteter Frauenmantel
3,Alchemilla vulgaris s. str.,0,1969,Spitzlappiger Frauenmantel
4,Alisma gramineum,0,1950,Grasblättriger Froschlöffel


In [120]:
# Editing species column based on binomial nomenclature

df1 = df.Species.str.split(pat=" ", n = 3, expand = True)
df.Species = df1[0].str.cat(df1[1], sep = " ")
df.head(10)

Unnamed: 0,Species,BE,Letzter Nachweis,Common_name
0,Achillea salicifolia,0,1999,Weidenblatt-Schafgarbe
1,Agrostemma githago,0,1989,Korn-Rade
2,Alchemilla plicata,0,1990,Gefalteter Frauenmantel
3,Alchemilla vulgaris,0,1969,Spitzlappiger Frauenmantel
4,Alisma gramineum,0,1950,Grasblättriger Froschlöffel
5,Allium lusitanicum,0,1859,Berg-Lauch
6,Alopecurus myosuroides,0,1968,Acker-Fuchsschwanz
7,Althaea officinalis,0,1859,Echter Eibisch
8,Ammophila arenaria,0,1859,Gemeiner Strandhafer
9,Anagallis minima,0,1985,Acker-Kleinling


In [130]:
# Creating a dictionary and extracting GBIF data
d = {}

for i in df.Species:     
   d[i] = species.name_backbone(name=str(i), kingdom='plants')
print("Complete")

Complete


In [131]:
gbif_data = pd.DataFrame.from_dict(d, orient = "index")
gbif_data.reset_index(inplace = True)
gbif_data.columns
df["Family"] = gbif_data["family"]
df

Unnamed: 0,Species,BE,Letzter Nachweis,Common_name,Family
0,Achillea salicifolia,0,1999,Weidenblatt-Schafgarbe,Asteraceae
1,Agrostemma githago,0,1989,Korn-Rade,Caryophyllaceae
2,Alchemilla plicata,0,1990,Gefalteter Frauenmantel,Rosaceae
3,Alchemilla vulgaris,0,1969,Spitzlappiger Frauenmantel,Rosaceae
4,Alisma gramineum,0,1950,Grasblättriger Froschlöffel,Alismataceae
...,...,...,...,...,...
261,Veronica opaca,0,1881,Glanzloser Ehrenpreis,
262,Veronica teucrium,0,1881,Großer Ehrenpreis,
263,Viola stagnina,0,1984,Gräben-Veilchen,
264,Vulpia bromoides,0,1992,Trespen-Federschwingel,


In [134]:
df.loc[df.Family.isna()]

Unnamed: 0,Species,BE,Letzter Nachweis,Common_name,Family
259,Valerianella dentata,0,1965,Gezähntes Rapünzchen,
260,Valerianella rimosa,0,1900,Gefurchtes Rapünzchen,
261,Veronica opaca,0,1881,Glanzloser Ehrenpreis,
262,Veronica teucrium,0,1881,Großer Ehrenpreis,
263,Viola stagnina,0,1984,Gräben-Veilchen,
264,Vulpia bromoides,0,1992,Trespen-Federschwingel,
265,Xanthium strumarium,0,1986,Gemeine Spitzklette,


In [135]:
missing = {}

for i in df.loc[df["Family"].isna()].Species:     
   missing[i] = species.name_backbone(name=str(i), kingdom='plants')
print("Complete")

Complete


In [136]:
gbif_data_missing = pd.DataFrame.from_dict(missing, orient = "index")
gbif_data_missing.reset_index(inplace = True)
gbif_data_missing.columns

Index(['index', 'usageKey', 'scientificName', 'canonicalName', 'rank',
       'status', 'confidence', 'matchType', 'kingdom', 'phylum', 'order',
       'family', 'genus', 'species', 'kingdomKey', 'phylumKey', 'classKey',
       'orderKey', 'familyKey', 'genusKey', 'speciesKey', 'synonym', 'class',
       'acceptedUsageKey'],
      dtype='object')

In [137]:
gbif_data_missing["family"]

0    Caprifoliaceae
1    Caprifoliaceae
2    Plantaginaceae
3    Plantaginaceae
4         Violaceae
5           Poaceae
6        Asteraceae
Name: family, dtype: object

In [143]:
i = 259
j = 0;
while i < 266:
    df.at[i, "Family"] = gbif_data_missing["family"][j]
    i = i + 1
    j = j + 1
print("complete")
df.tail(7)

complete


Unnamed: 0,Species,BE,Letzter Nachweis,Common_name,Family
261,Veronica opaca,0,1881,Glanzloser Ehrenpreis,Plantaginaceae
262,Veronica teucrium,0,1881,Großer Ehrenpreis,Plantaginaceae
263,Viola stagnina,0,1984,Gräben-Veilchen,Violaceae
264,Vulpia bromoides,0,1992,Trespen-Federschwingel,Poaceae
265,Xanthium strumarium,0,1986,Gemeine Spitzklette,Asteraceae


In [144]:
df.Family.isna().sum()

0

In [145]:
# renaming columns
df.rename({
    "Common_name": "Deutscher_Name",
    "Letzter Nachweis": "Letzter_Nachweis"
}, 
    axis='columns',
    inplace = True
)
df

Unnamed: 0,Species,BE,Letzter_Nachweis,Deutscher_Name,Family
0,Achillea salicifolia,0,1999,Weidenblatt-Schafgarbe,Asteraceae
1,Agrostemma githago,0,1989,Korn-Rade,Caryophyllaceae
2,Alchemilla plicata,0,1990,Gefalteter Frauenmantel,Rosaceae
3,Alchemilla vulgaris,0,1969,Spitzlappiger Frauenmantel,Rosaceae
4,Alisma gramineum,0,1950,Grasblättriger Froschlöffel,Alismataceae
...,...,...,...,...,...
261,Veronica opaca,0,1881,Glanzloser Ehrenpreis,Plantaginaceae
262,Veronica teucrium,0,1881,Großer Ehrenpreis,Plantaginaceae
263,Viola stagnina,0,1984,Gräben-Veilchen,Violaceae
264,Vulpia bromoides,0,1992,Trespen-Federschwingel,Poaceae


In [146]:
# Add common names
#df["Deutscher_Name"] = ""
# Add reference column
df["Reference"] = "Rote Liste und Gesamtartenliste der etablierten Farn- und Blütenpflanzen von Berlin"
# Adding Letzter_Nachweis column
#df["Letzter_Nachweis"] = ""
df.head()

Unnamed: 0,Species,BE,Letzter_Nachweis,Deutscher_Name,Family,Reference
0,Achillea salicifolia,0,1999,Weidenblatt-Schafgarbe,Asteraceae,Rote Liste und Gesamtartenliste der etablierte...
1,Agrostemma githago,0,1989,Korn-Rade,Caryophyllaceae,Rote Liste und Gesamtartenliste der etablierte...
2,Alchemilla plicata,0,1990,Gefalteter Frauenmantel,Rosaceae,Rote Liste und Gesamtartenliste der etablierte...
3,Alchemilla vulgaris,0,1969,Spitzlappiger Frauenmantel,Rosaceae,Rote Liste und Gesamtartenliste der etablierte...
4,Alisma gramineum,0,1950,Grasblättriger Froschlöffel,Alismataceae,Rote Liste und Gesamtartenliste der etablierte...


In [147]:
# Rearranging the dataframe for uniformity
df = df[["Species",
         "Family", 
         "BE", 
         "Letzter_Nachweis",
         "Deutscher_Name", 
         "Reference"]]
df.head(10)

Unnamed: 0,Species,Family,BE,Letzter_Nachweis,Deutscher_Name,Reference
0,Achillea salicifolia,Asteraceae,0,1999,Weidenblatt-Schafgarbe,Rote Liste und Gesamtartenliste der etablierte...
1,Agrostemma githago,Caryophyllaceae,0,1989,Korn-Rade,Rote Liste und Gesamtartenliste der etablierte...
2,Alchemilla plicata,Rosaceae,0,1990,Gefalteter Frauenmantel,Rote Liste und Gesamtartenliste der etablierte...
3,Alchemilla vulgaris,Rosaceae,0,1969,Spitzlappiger Frauenmantel,Rote Liste und Gesamtartenliste der etablierte...
4,Alisma gramineum,Alismataceae,0,1950,Grasblättriger Froschlöffel,Rote Liste und Gesamtartenliste der etablierte...
5,Allium lusitanicum,Amaryllidaceae,0,1859,Berg-Lauch,Rote Liste und Gesamtartenliste der etablierte...
6,Alopecurus myosuroides,Poaceae,0,1968,Acker-Fuchsschwanz,Rote Liste und Gesamtartenliste der etablierte...
7,Althaea officinalis,Malvaceae,0,1859,Echter Eibisch,Rote Liste und Gesamtartenliste der etablierte...
8,Ammophila arenaria,Poaceae,0,1859,Gemeiner Strandhafer,Rote Liste und Gesamtartenliste der etablierte...
9,Anagallis minima,Primulaceae,0,1985,Acker-Kleinling,Rote Liste und Gesamtartenliste der etablierte...


In [148]:
# Saving the extracted csv to the "Data_cleaning_step1" directory
# export the dataframe as CSV

df.to_csv('../../Transformation/Data_cleaning_step1/28_Vascular_plants.csv', 
          index=False)

# 29: 

In [None]:
# Updated export location
# export the dataframe as CSV
df2.to_csv('../../Transformation/Raw_csv/Mammalia.csv', 
          index = False
         )

In [None]:
# Read file into dataframe

df = pd.read_csv('Name.csv')
df.head(10)

In [None]:
# Removing columns "Bestand","Trend_lang","Trend_kurz", "RF", "BB", "D", "GS", "GfU"
df.drop(columns = ["Bestand",
                   "Trend_lang",
                   "Trend_kurz",
                   "RF",
                   "BB",
                   "D",
                   "GfU", 
                   "Vorzugs_habitat",
                   "Ökolog_Typ"
                  ], inplace = True)
df.head()

In [None]:
# Editing species column based on binomial nomenclature

df1 = df.Species.str.split(pat=" ", n = 3, expand = True)
df.Species = df1[0].str.cat(df1[1], sep = " ")
df.head(10)

In [None]:
# Creating a dictionary and extracting GBIF data
d = {}

for i in df.Species:     
   d[i] = species.name_backbone(name=str(i), kingdom='animals')
print("Complete")

In [None]:
gbif_data = pd.DataFrame.from_dict(d, orient = "index")
gbif_data.reset_index(inplace = True)
gbif_data.columns
df["Family"] = gbif_data["family"]
df

In [None]:
# Add common names
df["Deutscher_Name"] = ""
# Add reference column
df["Reference"] = "Ref Name"
# Adding Letzter_Nachweis column
df["Letzter_Nachweis"] = ""
df.head()

In [None]:
# Rearranging the dataframe for uniformity
df = df[["Species",
         "Family", 
         "BE", 
         "Letzter_Nachweis",
         "Deutscher_Name", 
         "Reference"]]
df.head(10)

In [None]:
# Saving the extracted csv to the "Data_cleaning_step1" directory
# export the dataframe as CSV

df.to_csv('../../Transformation/Data_cleaning_step1/22_name.csv', 
          index=False)

In [None]:
# Updated export location
# export the dataframe as CSV
df2.to_csv('../../Transformation/Raw_csv/Mammalia.csv', 
          index = False
         )

In [None]:
# Read file into dataframe

df = pd.read_csv('Name.csv')
df.head(10)

In [None]:
# Removing columns "Bestand","Trend_lang","Trend_kurz", "RF", "BB", "D", "GS", "GfU"
df.drop(columns = ["Bestand",
                   "Trend_lang",
                   "Trend_kurz",
                   "RF",
                   "BB",
                   "D",
                   "GfU", 
                   "Vorzugs_habitat",
                   "Ökolog_Typ"
                  ], inplace = True)
df.head()

In [None]:
# Editing species column based on binomial nomenclature

df1 = df.Species.str.split(pat=" ", n = 3, expand = True)
df.Species = df1[0].str.cat(df1[1], sep = " ")
df.head(10)

In [None]:
# Creating a dictionary and extracting GBIF data
d = {}

for i in df.Species:     
   d[i] = species.name_backbone(name=str(i), kingdom='animals')
print("Complete")

In [None]:
gbif_data = pd.DataFrame.from_dict(d, orient = "index")
gbif_data.reset_index(inplace = True)
gbif_data.columns
df["Family"] = gbif_data["family"]
df

In [None]:
# Add common names
df["Deutscher_Name"] = ""
# Add reference column
df["Reference"] = "Ref Name"
# Adding Letzter_Nachweis column
df["Letzter_Nachweis"] = ""
df.head()

In [None]:
# Rearranging the dataframe for uniformity
df = df[["Species",
         "Family", 
         "BE", 
         "Letzter_Nachweis",
         "Deutscher_Name", 
         "Reference"]]
df.head(10)

In [None]:
# Saving the extracted csv to the "Data_cleaning_step1" directory
# export the dataframe as CSV

df.to_csv('../../Transformation/Data_cleaning_step1/22_name.csv', 
          index=False)

In [None]:
# Updated export location
# export the dataframe as CSV
df2.to_csv('../../Transformation/Raw_csv/Mammalia.csv', 
          index = False
         )

In [None]:
# Read file into dataframe

df = pd.read_csv('Name.csv')
df.head(10)

In [None]:
# Removing columns "Bestand","Trend_lang","Trend_kurz", "RF", "BB", "D", "GS", "GfU"
df.drop(columns = ["Bestand",
                   "Trend_lang",
                   "Trend_kurz",
                   "RF",
                   "BB",
                   "D",
                   "GfU", 
                   "Vorzugs_habitat",
                   "Ökolog_Typ"
                  ], inplace = True)
df.head()

In [None]:
# Editing species column based on binomial nomenclature

df1 = df.Species.str.split(pat=" ", n = 3, expand = True)
df.Species = df1[0].str.cat(df1[1], sep = " ")
df.head(10)

In [None]:
# Creating a dictionary and extracting GBIF data
d = {}

for i in df.Species:     
   d[i] = species.name_backbone(name=str(i), kingdom='animals')
print("Complete")

In [None]:
gbif_data = pd.DataFrame.from_dict(d, orient = "index")
gbif_data.reset_index(inplace = True)
gbif_data.columns
df["Family"] = gbif_data["family"]
df

In [None]:
# Add common names
df["Deutscher_Name"] = ""
# Add reference column
df["Reference"] = "Ref Name"
# Adding Letzter_Nachweis column
df["Letzter_Nachweis"] = ""
df.head()

In [None]:
# Rearranging the dataframe for uniformity
df = df[["Species",
         "Family", 
         "BE", 
         "Letzter_Nachweis",
         "Deutscher_Name", 
         "Reference"]]
df.head(10)

In [None]:
# Saving the extracted csv to the "Data_cleaning_step1" directory
# export the dataframe as CSV

df.to_csv('../../Transformation/Data_cleaning_step1/22_name.csv', 
          index=False)