## Macrobenthic Data Reshaping for Timeline Map

In [1]:
! pip install pandas
! pip install numpy



In [2]:
import pandas as pd
import numpy as np

In [3]:
ben_sqi = pd.read_csv('../original_data/BENTHIC_MACROINVERTEBRATES_SQI.csv')

In [4]:
ben_sqi.head()

Unnamed: 0,FIELDID,Latitude,Longitude,F22 SQI,SP22 SQI,F21 SQI,SP21 SQI,F20 SQI,SP20 SQI,F19 SQI,...,SP05 SQI,F04 SQI,SP04 SQI,F03 SQI,SP03 SQI,F02 SQI,SP02 SQI,F01 SQI,SP01 SQI,Unnamed: 47
0,Bish1,42.482378,-83.460553,,,,,,,,...,,,15.0,,17.0,,21.0,,22.0,
1,Bish2,42.47131,-83.45151,38.0,,21.0,28.0,20.0,,23.0,...,,,,,,,,,,
2,Evan1,42.504517,-83.233388,,,,,,,,...,,,,,,,,,,
3,Evan2,42.472955,-83.247905,20.0,16.0,14.0,12.0,21.0,,16.0,...,,,,,,,,,,
4,Evan3,42.459278,-83.268639,,,2.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,


In [5]:
ben_sqi.drop(columns=['Unnamed: 47'], inplace=True)

In [6]:
ben_sqi_new = ben_sqi
ben_sqi_new.head()

Unnamed: 0,FIELDID,Latitude,Longitude,F22 SQI,SP22 SQI,F21 SQI,SP21 SQI,F20 SQI,SP20 SQI,F19 SQI,...,F05 SQI,SP05 SQI,F04 SQI,SP04 SQI,F03 SQI,SP03 SQI,F02 SQI,SP02 SQI,F01 SQI,SP01 SQI
0,Bish1,42.482378,-83.460553,,,,,,,,...,,,,15.0,,17.0,,21.0,,22.0
1,Bish2,42.47131,-83.45151,38.0,,21.0,28.0,20.0,,23.0,...,,,,,,,,,,
2,Evan1,42.504517,-83.233388,,,,,,,,...,,,,,,,,,,
3,Evan2,42.472955,-83.247905,20.0,16.0,14.0,12.0,21.0,,16.0,...,,,,,,,,,,
4,Evan3,42.459278,-83.268639,,,2.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,


In [7]:
### ADD THE NEWEST COLUMN NAME AS A STRING TO THIS LIST OF COLUMNS ###
unused_cols = ['FIELDID', 'Latitude', 'Longitude']

In [8]:
benthics_sqi = pd.DataFrame(columns=['FIELDID', 'Latitude', 'Longitude', 'Date', 'Season', 'SQI'], index=['FIELDID'])
for index, row in ben_sqi_new.iterrows():
    for col in ben_sqi_new.columns:
        # pull out sampling season
        if col in unused_cols:
            continue
        else:
            if col[0] == 'F':
                season = 'Fall'
                date = '10/15/' + col[1:3]
                # convert to datetime
                date_time = pd.to_datetime(date)
            else:
                season = 'Spring'
                date = '04/15/' + col[2:4]
                # convert to datetime
                date_time = pd.to_datetime(date)
            # pull out SQI value
            sqi = row[col]
            # compile row dict
            info = {'FIELDID':row['FIELDID'], 'Latitude':row['Latitude'], 'Longitude':row['Longitude'], 'Date':date_time, 'Season':season, 'SQI':sqi}
            info_df = pd.DataFrame(info, index=['Date'])
        # create new row for each sampling event
        benthics_sqi = pd.concat([benthics_sqi, info_df], ignore_index = True)

In [9]:
benthics_sqi.sample(5)

Unnamed: 0,FIELDID,Latitude,Longitude,Date,Season,SQI
2276,Main6,42.47882,-83.284548,2007-04-15,Spring,19.0
3290,MR-14,42.360143,-83.476375,2006-04-15,Spring,
4549,See2,42.48927,-83.40099,2014-10-15,Fall,24.0
4300,Mur2,42.59375,-83.25174,2007-04-15,Spring,25.0
337,Fel4,42.31346,-83.46471,2008-10-15,Fall,32.0


In [10]:
benthics_sqi.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5589 entries, 0 to 5588
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   FIELDID    5588 non-null   object        
 1   Latitude   5588 non-null   float64       
 2   Longitude  5588 non-null   float64       
 3   Date       5588 non-null   datetime64[ns]
 4   Season     5588 non-null   object        
 5   SQI        1774 non-null   float64       
dtypes: datetime64[ns](1), float64(3), object(2)
memory usage: 262.1+ KB


In [11]:
benthics_sqi.dropna(inplace=True)

In [12]:
benthics_sqi['SQI'] = benthics_sqi['SQI'].astype(int)

In [13]:
bins = [-1, 19, 33, 48, np.inf]
names = ['Poor', 'Fair', 'Good', 'Excellent']

benthics_sqi['SQI Rating'] = pd.cut(benthics_sqi['SQI'], bins, labels=names)

In [14]:
benthics_sqi.drop_duplicates(inplace=True)

In [15]:
benthics_sqi.head()

Unnamed: 0,FIELDID,Latitude,Longitude,Date,Season,SQI,SQI Rating
38,Bish1,42.482378,-83.460553,2004-04-15,Spring,15,Poor
40,Bish1,42.482378,-83.460553,2003-04-15,Spring,17,Poor
42,Bish1,42.482378,-83.460553,2002-04-15,Spring,21,Fair
44,Bish1,42.482378,-83.460553,2001-04-15,Spring,22,Fair
45,Bish2,42.47131,-83.45151,2022-10-15,Fall,38,Good


In [16]:
benthics_sqi.to_csv('../modified_data/benthics/benthics_reshaped_full.csv')

In [17]:
benthics_sqi_fall = benthics_sqi[benthics_sqi.Season != "Spring"]
benthics_sqi_fall.head()

Unnamed: 0,FIELDID,Latitude,Longitude,Date,Season,SQI,SQI Rating
45,Bish2,42.47131,-83.45151,2022-10-15,Fall,38,Good
47,Bish2,42.47131,-83.45151,2021-10-15,Fall,21,Fair
49,Bish2,42.47131,-83.45151,2020-10-15,Fall,20,Fair
51,Bish2,42.47131,-83.45151,2019-10-15,Fall,23,Fair
55,Bish2,42.47131,-83.45151,2017-10-15,Fall,20,Fair


In [18]:
benthics_sqi_fall.to_csv('../modified_data/benthics/benthics_reshaped_fall.csv')

In [19]:
benthics_sqi_spring = benthics_sqi[benthics_sqi.Season != "Fall"]
benthics_sqi_spring.head()

Unnamed: 0,FIELDID,Latitude,Longitude,Date,Season,SQI,SQI Rating
38,Bish1,42.482378,-83.460553,2004-04-15,Spring,15,Poor
40,Bish1,42.482378,-83.460553,2003-04-15,Spring,17,Poor
42,Bish1,42.482378,-83.460553,2002-04-15,Spring,21,Fair
44,Bish1,42.482378,-83.460553,2001-04-15,Spring,22,Fair
48,Bish2,42.47131,-83.45151,2021-04-15,Spring,28,Fair


In [20]:
benthics_sqi_spring.to_csv('../modified_data/benthics/benthics_reshaped_spring.csv')