In [5]:
# import libraries
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import math
import re

In [4]:
# load data
tmean = pd.read_csv('/Users/edwardamoah/Documents/GitHub/OsmiaPopModel/output/tmean_prism_pennsylvania_data.csv')
ppt = pd.read_csv('/Users/edwardamoah/Documents/GitHub/OsmiaPopModel/output/ppt_prism_pennsylvania_data.csv')
forage = pd.read_csv('/Users/edwardamoah/Documents/GitHub/OsmiaPopModel/output/foraging_quality_pennsylvania_data.csv')

In [6]:

cols = forage.columns.tolist()

# create new columns 
new_cols = []
for col in cols:
    if "Forage" in col:
        code = col.split("_")[1]
        number = re.findall(r'\d+', code)[0]
        new_cols.append(number)
    else:
        new_cols.append(col)

new_cols

# rename columns
forage = forage.rename(columns=dict(zip(forage.columns, new_cols)))

# create grid_id column
forage["grid_id"] = forage["col"].astype(str) + "_" + forage["row"].astype(str)

possible_cols = forage.col.tolist()
possible_rows = forage.row.tolist()

def get_adjacent_cols_rows(col, row, possible_cols=possible_cols, possible_rows=possible_rows):
    adjacent_cols_rows = []
    
    for c, r in zip(possible_cols, possible_rows):
        if abs(c - col) <= 1 and abs(r - row) <= 1 and (c != col or r != row):
            adjacent_cols_rows.append((c, r))
    
    return adjacent_cols_rows



def getGridQuality(col, row, year, forage=forage):
    grid_id = str(col) + "_" + str(row)
    grid_quality = forage[forage["grid_id"] == grid_id][str(year)].values[0]
    return grid_quality


def getGridForageQuality(col, row, year):
    '''
    Get forage quality for a given grid cell
    col: int, column number
    row: int, row number
    year: int, year
    
    output:
    float [0,233]
    '''
    grid_quality = getGridQuality(col, row, year)
    if math.isnan(grid_quality):
        adjacent_cols_rows = get_adjacent_cols_rows(col, row)
        adjacent_quality = []
        for c, r in adjacent_cols_rows:
            grid_quality = getGridQuality(c, r, year)
            if not math.isnan(grid_quality):
                adjacent_quality.append(grid_quality)
        return np.mean(adjacent_quality)
    else:
        return grid_quality




################# tmean data #################

cols = tmean.columns.tolist()

new_cols = []
for col in cols:
    if "PRISM_tmean" in col:
        new_cols.append(col.split("_")[-2])
    else:
        new_cols.append(col)
new_cols

tmean = tmean.rename(columns=dict(zip(tmean.columns, new_cols)))


tmean["grid_id"] = tmean["col"].astype(str) + "_" + tmean["row"].astype(str)

def getTmean(col, row, day, month, year, tmean=tmean):
    '''
    Get temperature data for a given grid cell
    col: int, column number
    row: int, row number
    day: int, day of the month
    month: int, month of the year
    year: int, year
    tmean: pd.DataFrame, temperature data

    output:
    float, temperature in degrees celcius
    '''
    if month < 10:
        month = "0" + str(month)
    if day < 10:
        day = "0" + str(day)
    date = str(year) + "" + str(month) + "" + str(day)
    grid_id = str(col) + "_" + str(row)
    tmean = tmean[tmean["grid_id"] == grid_id][date].values[0]
    return tmean

############### ppt data #################

cols = ppt.columns.tolist()

new_cols = []
for col in cols:
    if "PRISM_ppt" in col:
        new_cols.append(col.split("_")[-2])
    else:
        new_cols.append(col)

ppt = ppt.rename(columns=dict(zip(ppt.columns, new_cols)))

ppt["grid_id"] = ppt["col"].astype(str) + "_" + ppt["row"].astype(str)

def getPpt(col, row, day, month, year, ppt=ppt):
    '''
    Get precipitation data for a given grid cell
    col: int, column number
    row: int, row number
    day: int, day of the month
    month: int, month of the year
    year: int, year
    ppt: pd.DataFrame, precipitation data

    output:
    float, precipitation in mm
    '''
    if month < 10:
        month = "0" + str(month)
    if day < 10:
        day = "0" + str(day)
    date = str(year) + "" + str(month) + "" + str(day)
    grid_id = str(col) + "_" + str(row)
    ppt = ppt[ppt["grid_id"] == grid_id][date].values[0]
    return ppt

In [10]:
# Analysis

In [11]:
years = [2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023]

In [13]:
tmean.head()

Unnamed: 0.1,Unnamed: 0,col,row,lon,lat,20240401,20240402,20240403,20240404,20240405,...,20230922,20230923,20230924,20230925,20230926,20230927,20230928,20230929,20230930,grid_id
0,1,1083,184,-79.875,42.25,5.9265,5.4837,8.5025,3.6137,3.8873,...,15.345699,16.5809,15.1833,15.098599,16.1915,16.3295,14.4035,15.7274,15.281899,1083_184
1,2,1084,184,-79.833333,42.25,6.03,5.7104,8.5476,3.8709,4.096,...,15.432199,16.6245,15.2395,15.1629,16.2728,16.381599,14.5158,15.7701,15.351999,1084_184
2,3,1085,184,-79.791667,42.25,6.0538,6.0169,8.7476,4.0802,4.2606,...,15.810699,16.7824,15.483399,15.3375,16.5418,16.4575,14.8427,15.8241,15.6317,1085_184
3,4,1086,184,-79.75,42.25,6.0455,6.3161,8.892,4.3004,4.2719,...,16.052999,16.7612,15.656099,15.408899,16.6777,16.367399,15.032499,15.7118,15.7933,1086_184
4,5,1081,185,-79.958333,42.208333,5.6563,5.8635,8.7472,3.6509,3.307,...,16.0797,16.813099,15.6078,15.309099,16.473,16.229399,14.773,15.531099,15.654099,1081_185


In [14]:
data = tmean[['col','row']]

In [16]:
cols = data.col.tolist()
rows = data.row.tolist()

In [18]:
def getPennsylvaniaPpt(day, month, year):
    ppt_data = []
    for col, row in zip(cols, rows):
        ppt = getPpt(col, row, day, month, year)
        ppt_data.append(ppt)

    return np.mean(ppt_data)


def getPennsylvaniaTmean(day, month, year):
    tmean_data = []
    for col, row in zip(cols, rows):
        tmean = getTmean(col, row, day, month, year)
        tmean_data.append(tmean)

    return np.mean(tmean_data)


In [19]:
import pandas as pd

start_date = '2008-01-01'
end_date = '2023-12-31'

date_range = pd.date_range(start=start_date, end=end_date, freq='D').strftime('%Y-%m-%d').tolist()

date_range


['2008-01-01',
 '2008-01-02',
 '2008-01-03',
 '2008-01-04',
 '2008-01-05',
 '2008-01-06',
 '2008-01-07',
 '2008-01-08',
 '2008-01-09',
 '2008-01-10',
 '2008-01-11',
 '2008-01-12',
 '2008-01-13',
 '2008-01-14',
 '2008-01-15',
 '2008-01-16',
 '2008-01-17',
 '2008-01-18',
 '2008-01-19',
 '2008-01-20',
 '2008-01-21',
 '2008-01-22',
 '2008-01-23',
 '2008-01-24',
 '2008-01-25',
 '2008-01-26',
 '2008-01-27',
 '2008-01-28',
 '2008-01-29',
 '2008-01-30',
 '2008-01-31',
 '2008-02-01',
 '2008-02-02',
 '2008-02-03',
 '2008-02-04',
 '2008-02-05',
 '2008-02-06',
 '2008-02-07',
 '2008-02-08',
 '2008-02-09',
 '2008-02-10',
 '2008-02-11',
 '2008-02-12',
 '2008-02-13',
 '2008-02-14',
 '2008-02-15',
 '2008-02-16',
 '2008-02-17',
 '2008-02-18',
 '2008-02-19',
 '2008-02-20',
 '2008-02-21',
 '2008-02-22',
 '2008-02-23',
 '2008-02-24',
 '2008-02-25',
 '2008-02-26',
 '2008-02-27',
 '2008-02-28',
 '2008-02-29',
 '2008-03-01',
 '2008-03-02',
 '2008-03-03',
 '2008-03-04',
 '2008-03-05',
 '2008-03-06',
 '2008-03-

In [20]:
df = pd.DataFrame(date_range, columns=['date'])

In [21]:
df['day'] = pd.to_datetime(df['date']).dt.day
df['month'] = pd.to_datetime(df['date']).dt.month
df['year'] = pd.to_datetime(df['date']).dt.year

df['ppt'] = df.apply(lambda x: getPennsylvaniaPpt(x['day'], x['month'], x['year']), axis=1)
df['tmean'] = df.apply(lambda x: getPennsylvaniaTmean(x['day'], x['month'], x['year']), axis=1)