In [2]:
import pandas as pd

In [79]:

def flowerDataFrame(name, isNative):
    '''
    Will take name of species and if it is native and will transform that into a pandas dataframe that is easy to read.

    @param name: a string of the name of the species that you want to obtain data for
    @param isNative: a string stating whether the species is native to california or not
    '''
    # import the entire csv of the species data into a pandas datafram
    df = pd.read_csv(f"./data/{name}/occurrence.txt", sep='\t', low_memory=False)
    #ok
    #take only specific columns from the bigger dataframe
    df = df[['scientificName', 'decimalLongitude', 'decimalLatitude', 'month', 'year', 'stateProvince']]

    #add a column stating if it is native or not
    df['native'] = isNative

    #make new column that will drop any unnecessary 
    df['species'] = df['scientificName'].str[:len(name)]
    df = df[df['species'] == name]


    df = df.drop(['scientificName'], axis=1)
    df = df[(df['stateProvince'] == "California") | (df['stateProvince'] == "Ca")]
    df = df.drop(['stateProvince'], axis=1)
    df = df.dropna()
    df = df.reset_index(drop=True)
    df[list(["month", "year"])] = df[list(["month", "year"])].astype(int)
    df = df.rename(columns={'decimalLongitude': 'longitude',
                            'decimalLatitude':  'latitude'})
    return df

In [80]:
lasCal = flowerDataFrame('Lasthenia californica', 'yes')
lasCal

Unnamed: 0,longitude,latitude,month,year,native,species
0,-123.967366,41.856009,5,2017,yes,Lasthenia californica
1,-121.494900,36.165900,4,1968,yes,Lasthenia californica
2,-118.648018,34.077701,3,1959,yes,Lasthenia californica
3,-118.455048,34.073825,4,1932,yes,Lasthenia californica
4,-118.859368,34.079191,3,1941,yes,Lasthenia californica
...,...,...,...,...,...,...
1358,-117.261389,32.936111,4,2010,yes,Lasthenia californica
1359,-121.780810,39.773030,2,2011,yes,Lasthenia californica
1360,-121.193373,37.091812,3,2004,yes,Lasthenia californica
1361,-119.533333,35.049999,4,1998,yes,Lasthenia californica


In [48]:
#native plants
plntgo = flowerDataFrame("Plantago erecta Morris", 'yes')
clrkiP = flowerDataFrame("Clarkia purpurea", 'yes')
clrkiB = flowerDataFrame("Clarkia bottae", 'yes')
chaenc = flowerDataFrame("Chaenactis glabriuscula", 'yes')
amsink = flowerDataFrame("Amsinckia menziesii", 'yes')

#non-native
mdcgoP = flowerDataFrame("Medicago polymorpha", 'no')
cntrea = flowerDataFrame("Centaurea solstitialis", 'no')
euphba = flowerDataFrame("Euphorbia", 'no')
altrna = flowerDataFrame("Alternanthera philoxeroides", 'no')
brssTG = flowerDataFrame("Brassica tournefortii Gouan", 'no')

In [49]:
sdgwik = pd.read_csv(f"./data/sedgwick.csv")
sdgwik['year'] = sdgwik['Time'].str[0:4]
sdgwik['month'] = sdgwik['Time'].str[5:7]
sdgwik = sdgwik.drop(['Time','Sedgwick Rainfall Cumulative mm'], axis = 1)
sdgwik = sdgwik.rename(columns={'Sedgwick Rainfall mm': 'rainfall (mm)',
                                'Sedgwick Air Temp Avg degC': 'temp (C)'})
sdgwik = sdgwik.groupby(['year', 'month'])[['temp (C)', 'rainfall (mm)']].mean()
sdgwik = sdgwik.reset_index()
sdgwik['latitude'] = 34.6939
sdgwik['longitude'] = -120.0417
sdgwik['station'] = 'Sedgwick'
sdgwik

Unnamed: 0,year,month,temp (C),rainfall (mm),latitude,longitude,station name
0,2011,05,12.859908,0.000264,34.6939,-120.0417,Sedgwick
1,2011,06,15.809809,0.001646,34.6939,-120.0417,Sedgwick
2,2011,07,19.364899,0.000000,34.6939,-120.0417,Sedgwick
3,2011,08,18.476727,0.000000,34.6939,-120.0417,Sedgwick
4,2011,09,19.431128,0.000000,34.6939,-120.0417,Sedgwick
...,...,...,...,...,...,...,...
125,2021,10,16.048030,0.008023,34.6939,-120.0417,Sedgwick
126,2021,11,15.340034,0.000647,34.6939,-120.0417,Sedgwick
127,2021,12,9.259314,0.032148,34.6939,-120.0417,Sedgwick
128,2022,01,11.040459,0.000683,34.6939,-120.0417,Sedgwick
