In [1]:
import pandas as pd
from pandas.tseries.offsets import DateOffset

sea_level = pd.read_csv("inputs/sealevel.csv")
data = pd.read_csv("inputs/Zip_homes.csv")
filtered_data = sea_level[::3]

years = filtered_data['Year'].unique()
limited_rows = []

for year in years:
    year_data = filtered_data[filtered_data['Year'] == year]
    limited_rows.append(year_data.iloc[:12])

filtered_data = pd.concat(limited_rows)

transposed_data = data.T

transposed_data.reset_index(inplace=True)
new_header = transposed_data.iloc[0]
transposed_data = transposed_data[1:]
transposed_data.columns = new_header

transposed_data.rename(columns={'RegionName': 'Year'}, inplace=True)
transposed_data.set_index('Year', inplace=True)

filtered_data['Year'] = pd.to_datetime(filtered_data['Year'], format='%Y') + filtered_data.groupby('Year').cumcount().apply(lambda x: DateOffset(months=x))
filtered_data['Year'] = filtered_data['Year'].dt.to_period('M').astype(str)

filtered_data = filtered_data[~filtered_data.index.duplicated(keep='first')]

filtered_data.set_index('Year', inplace=True)

merged_data = pd.merge(transposed_data, filtered_data, how='outer', left_index=True, right_index=True)

merged_data.index = merged_data.index.rename('Date')

merged_data.to_csv('inputs/merged_data.csv')

print(merged_data.head())



         10025.0  60657.0  10023.0  60614.0  79936.0  10002.0  60640.0  \
Date                                                                     
1993-01      NaN      NaN      NaN      NaN      NaN      NaN      NaN   
1993-02      NaN      NaN      NaN      NaN      NaN      NaN      NaN   
1993-03      NaN      NaN      NaN      NaN      NaN      NaN      NaN   
1993-04      NaN      NaN      NaN      NaN      NaN      NaN      NaN   
1993-05      NaN      NaN      NaN      NaN      NaN      NaN      NaN   

         77084.0  94109.0  75070.0  ...  76005.0  81225.0  \
Date                                ...                     
1993-01      NaN      NaN      NaN  ...      NaN      NaN   
1993-02      NaN      NaN      NaN  ...      NaN      NaN   
1993-03      NaN      NaN      NaN  ...      NaN      NaN   
1993-04      NaN      NaN      NaN  ...      NaN      NaN   
1993-05      NaN      NaN      NaN  ...      NaN      NaN   

         TotalWeightedObservations  GMSL_noGIA  StdDe

In [2]:
filtered_merged_data = merged_data.loc['2010-01':'2017-09']

print(filtered_merged_data.head())

filtered_merged_data.to_csv('inputs/filtered_merged_data.csv')

          10025.0   60657.0  10023.0  60614.0   79936.0  10002.0  60640.0  \
Date                                                                        
2010-01  775000.0  349900.0      NaN      NaN  134700.0      NaN      NaN   
2010-02  725000.0  349000.0      NaN      NaN  130000.0      NaN      NaN   
2010-03  762500.0  339619.5      NaN      NaN  129900.0      NaN      NaN   
2010-04  725000.0  349000.0      NaN      NaN  129900.0      NaN      NaN   
2010-05  732500.0  334500.0      NaN      NaN  131450.0      NaN      NaN   

          77084.0  94109.0   75070.0  ...  76005.0  81225.0  \
Date                                  ...                     
2010-01  119900.0      NaN  191315.0  ...      NaN      NaN   
2010-02  119850.0      NaN  194900.0  ...      NaN      NaN   
2010-03  121950.0      NaN  190995.0  ...      NaN      NaN   
2010-04  120000.0      NaN  195000.0  ...      NaN      NaN   
2010-05  120000.0      NaN  194950.0  ...      NaN      NaN   

         TotalWeig

In [3]:
import pandas as pd

Coastal = [94015.0, 93950.0, 93109.0, 33137.0, 33129.0, 33131.0, 33308.0, 33062.0, 70124.0, 70122.0, 70126.0, 77505.0, 77058.0, 77015.0, 10069.0, 10010.0, 10305.0, 10314.0, 11214.0, 11209.0]
Inland = [94014.0, 93940.0, 93108.0, 33127.0, 33145.0, 33130.0, 33060.0, 70118.0, 70119.0, 77504.0, 77062.0, 10023.0, 10003.0, 10304.0, 11204.0]
Combined = [94014.0, 94015.0, 93940.0, 93950.0, 93108.0, 93109.0, 77504.0, 77505.0, 33127.0, 33137.0, 33145.0, 33129.0, 33130.0, 33131.0, 33309.0, 33308.0, 33060.0, 33062.0, 70118.0, 70124.0, 70119.0, 70122.0, 70116.0, 70126.0, 77062.0, 77058.0, 77020.0, 77015.0, 77078.0, 77044.0, 10304.0, 10305.0, 10306.0, 10314.0, 11204.0, 11214.0, 11219.0, 11209.0, 10023.0, 10069.0, 10003.0, 10010.0]
additional_columns = [
    "TotalWeightedObservations",
    "GMSL_noGIA",
    "StdDevGMSL_noGIA",
    "SmoothedGSML_noGIA",
    "GMSL_GIA",
    "StdDevGMSL_GIA",
    "SmoothedGSML_GIA",
    "SmoothedGSML_GIA_sigremoved",
]

selected_columns = Coastal + Inland + additional_columns

filtered_zip = filtered_merged_data.loc[:, selected_columns]
filtered_zip = filtered_zip.reset_index()
filtered_zip.to_csv('inputs/filtered_zip.csv', index=False)
print(filtered_zip)

       Date   94015.0   93950.0    93109.0   33137.0   33129.0   33131.0  \
0   2010-01       NaN  749900.0        NaN       NaN       NaN  300000.0   
1   2010-02       NaN  699000.0        NaN       NaN       NaN  305000.0   
2   2010-03       NaN  699000.0        NaN       NaN       NaN  299000.0   
3   2010-04       NaN  670000.0        NaN       NaN       NaN  298000.0   
4   2010-05       NaN  675000.0        NaN       NaN       NaN  319000.0   
..      ...       ...       ...        ...       ...       ...       ...   
88  2017-05  833944.0  874944.0  1349000.0  499888.0  558250.0  470000.0   
89  2017-06  799999.0  898500.0  1299000.0  515350.0  545000.0  475000.0   
90  2017-07  849000.0  935000.0  1400000.0  539000.0  549000.0  470000.0   
91  2017-08  848888.0  945000.0  1370000.0  549000.0  539777.0  485000.0   
92  2017-09  838944.0  938500.0  1249000.0  549000.0  539000.0  485000.0   

     33308.0   33062.0   70124.0  ...   10304.0   11204.0  \
0   356200.0  339000.0  28

In [4]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression


filtered_zip = filtered_zip.drop(columns=['Date'])

results = {}

for col in filtered_zip.columns:

    if col == 'GMSL_noGIA':
        continue

    y = filtered_zip[col].dropna().values.reshape(-1, 1)

    x = filtered_zip['GMSL_noGIA'][filtered_zip[col].notnull()].values.reshape(-1, 1)

    model = LinearRegression().fit(x, y)

    results[col] = {
        'coefficient': model.coef_[0][0],
        'intercept': model.intercept_[0],
        'r_squared': model.score(x, y)
    }

for zipcode, result in results.items():
    print(f"{zipcode}: Coefficient: {result['coefficient']:.2f}, Intercept: {result['intercept']:.2f}, R-squared: {result['r_squared']:.2f}")

94015.0: Coefficient: 8094.92, Intercept: 449272.43, R-squared: 0.58
93950.0: Coefficient: 10520.17, Intercept: 499163.57, R-squared: 0.74
93109.0: Coefficient: 3842.68, Intercept: 1199154.54, R-squared: 0.05
33137.0: Coefficient: 856.47, Intercept: 518485.85, R-squared: 0.01
33129.0: Coefficient: 5410.10, Intercept: 329368.11, R-squared: 0.74
33131.0: Coefficient: 4601.97, Intercept: 323855.24, R-squared: 0.65
33308.0: Coefficient: 2931.93, Intercept: 291626.75, R-squared: 0.63
33062.0: Coefficient: 3303.70, Intercept: 264029.81, R-squared: 0.78
70124.0: Coefficient: 6042.30, Intercept: 235853.79, R-squared: 0.78
70122.0: Coefficient: -170.50, Intercept: 217812.84, R-squared: 0.00
70126.0: Coefficient: -144.64, Intercept: 120712.98, R-squared: 0.02
77505.0: Coefficient: 1783.22, Intercept: 148888.45, R-squared: 0.59
77058.0: Coefficient: 1779.23, Intercept: 151581.49, R-squared: 0.19
77015.0: Coefficient: 1325.01, Intercept: 74853.67, R-squared: 0.67
10069.0: Coefficient: -9674.39, In

In [16]:
import pandas as pd

Coastal = [94015.0, 93950.0, 93109.0, 33137.0, 33129.0, 33131.0, 33308.0, 33062.0, 70124.0, 70122.0, 70126.0, 77505.0, 77058.0, 77015.0, 10069.0, 10010.0, 10305.0, 10314.0, 11214.0, 11209.0]
Inland = [94014.0, 93940.0, 93108.0, 33127.0, 33145.0, 33130.0, 33060.0, 70118.0, 70119.0, 77504.0, 77062.0, 10023.0, 10003.0, 10304.0, 11204.0]
Combined = [94014.0, 94015.0, 93940.0, 93950.0, 93108.0, 93109.0, 77504.0, 77505.0, 33127.0, 33137.0, 33145.0, 33129.0, 33130.0, 33131.0, 33309.0, 33308.0, 33060.0, 33062.0, 70118.0, 70124.0, 70119.0, 70122.0, 70116.0, 70126.0, 77062.0, 77058.0, 77020.0, 77015.0, 77078.0, 77044.0, 10304.0, 10305.0, 10306.0, 10314.0, 11204.0, 11214.0, 11219.0, 11209.0, 10023.0, 10069.0, 10003.0, 10010.0]
additional_columns = [
    "TotalWeightedObservations",
    "GMSL_noGIA",
    "StdDevGMSL_noGIA",
    "SmoothedGSML_noGIA",
    "GMSL_GIA",
    "StdDevGMSL_GIA",
    "SmoothedGSML_GIA",
    "SmoothedGSML_GIA_sigremoved",
]

selected_columns = Coastal + Inland + additional_columns

filtered_zip = filtered_merged_data.loc[:, selected_columns]
filtered_zip = filtered_zip.reset_index()

# Transpose the DataFrame
transposed_filtered_zip = filtered_zip.transpose()

# Reset the index to create a new column with the index values
transposed_filtered_zip = transposed_filtered_zip.reset_index()

# Rename the first column to "Zip" instead of "Date"
transposed_filtered_zip = transposed_filtered_zip.rename(columns={'index': 'Zip'})

# Set the dates as column names
transposed_filtered_zip.columns = transposed_filtered_zip.iloc[0]
transposed_filtered_zip = transposed_filtered_zip.drop(transposed_filtered_zip.index[0])

# Rename the "Date" column to "Zip"
transposed_filtered_zip = transposed_filtered_zip.rename(columns={'Date': 'Zip'})

# Save the transposed DataFrame to a new CSV file
transposed_filtered_zip.to_csv('inputs/transposed_filtered_zip.csv', index=False)
print(transposed_filtered_zip.head())

0      Zip   2010-01   2010-02   2010-03   2010-04   2010-05   2010-06  \
1  94015.0       NaN       NaN       NaN       NaN       NaN       NaN   
2  93950.0  749900.0  699000.0  699000.0  670000.0  675000.0  642450.0   
3  93109.0       NaN       NaN       NaN       NaN       NaN       NaN   
4  33137.0       NaN       NaN       NaN       NaN       NaN       NaN   
5  33129.0       NaN       NaN       NaN       NaN       NaN       NaN   

0   2010-07   2010-08   2010-09  ...    2016-12    2017-01    2017-02  \
1       NaN       NaN       NaN  ...   773388.5   768000.0   759000.0   
2  635000.0  649000.0  625000.0  ...   998000.0   950000.0   949000.0   
3       NaN       NaN       NaN  ...  1349900.0  1449000.0  1499000.0   
4       NaN       NaN       NaN  ...   529950.0   527000.0   520000.0   
5       NaN       NaN       NaN  ...   539900.0   545000.0   529000.0   

0    2017-03    2017-04    2017-05    2017-06    2017-07    2017-08    2017-09  
1   749000.0   799000.0   833944.0 

In [18]:
selected_columns = Coastal + Inland + additional_columns

filtered_zip = filtered_merged_data.loc[:, selected_columns]
filtered_zip = filtered_zip.reset_index()

# Transpose the DataFrame
transposed_filtered_zip = filtered_zip.transpose()

# Reset the index to create a new column with the index values
transposed_filtered_zip = transposed_filtered_zip.reset_index()

# Rename the first column to "Zip" instead of "Date"
transposed_filtered_zip = transposed_filtered_zip.rename(columns={'index': 'Zip'})

# Set the dates as column names
transposed_filtered_zip.columns = transposed_filtered_zip.iloc[0]
transposed_filtered_zip = transposed_filtered_zip.drop(transposed_filtered_zip.index[0])

transposed_filtered_zip = transposed_filtered_zip.rename(columns={'Date': 'Zip'})

# Add the "Coastal/Inland" and "Pair" columns
transposed_filtered_zip['Coastal/Inland'] = transposed_filtered_zip['Zip'].apply(lambda x: 1 if x in Coastal else 0)
transposed_filtered_zip['Pair'] = 0
pair_index = 1

for i in range(0, len(Combined), 2):
    inland_zip = Combined[i]
    coastal_zip = Combined[i+1]
    transposed_filtered_zip.loc[transposed_filtered_zip['Zip'] == inland_zip, 'Pair'] = pair_index
    transposed_filtered_zip.loc[transposed_filtered_zip['Zip'] == coastal_zip, 'Pair'] = pair_index
    pair_index += 1

# Save the transposed DataFrame to a new CSV file
transposed_filtered_zip.to_csv('inputs/transposed_filtered_zip.csv', index=False)
print(transposed_filtered_zip.head())

0      Zip   2010-01   2010-02   2010-03   2010-04   2010-05   2010-06  \
1  94015.0       NaN       NaN       NaN       NaN       NaN       NaN   
2  93950.0  749900.0  699000.0  699000.0  670000.0  675000.0  642450.0   
3  93109.0       NaN       NaN       NaN       NaN       NaN       NaN   
4  33137.0       NaN       NaN       NaN       NaN       NaN       NaN   
5  33129.0       NaN       NaN       NaN       NaN       NaN       NaN   

0   2010-07   2010-08   2010-09  ...    2017-02    2017-03    2017-04  \
1       NaN       NaN       NaN  ...   759000.0   749000.0   799000.0   
2  635000.0  649000.0  625000.0  ...   949000.0   849000.0   854000.0   
3       NaN       NaN       NaN  ...  1499000.0  1425000.0  1382000.0   
4       NaN       NaN       NaN  ...   520000.0   520000.0   515000.0   
5       NaN       NaN       NaN  ...   529000.0   547000.0   558900.0   

0    2017-05    2017-06    2017-07    2017-08    2017-09 Coastal/Inland Pair  
1   833944.0   799999.0   849000.0   