In [1]:
import pandas as pd
from pandas.tseries.offsets import DateOffset

sea_level = pd.read_csv("inputs/sealevel.csv")
data = pd.read_csv("inputs/Zip_homes.csv")
filtered_data = sea_level[::3]

# Requirement 1: Limit to 12 rows per year
years = filtered_data['Year'].unique()
limited_rows = []

for year in years:
    year_data = filtered_data[filtered_data['Year'] == year]
    limited_rows.append(year_data.iloc[:12])

filtered_data = pd.concat(limited_rows)

# Transpose the DataFrame
transposed_data = data.T

# If the index (dates) should be the header
transposed_data.reset_index(inplace=True)
new_header = transposed_data.iloc[0]
transposed_data = transposed_data[1:]
transposed_data.columns = new_header

# Rename 'RegionName' to 'Year'
transposed_data.rename(columns={'RegionName': 'Year'}, inplace=True)
transposed_data.set_index('Year', inplace=True)

# Format 'Year' column in filtered_data
filtered_data['Year'] = pd.to_datetime(filtered_data['Year'], format='%Y') + filtered_data.groupby('Year').cumcount().apply(lambda x: DateOffset(months=x))
filtered_data['Year'] = filtered_data['Year'].dt.to_period('M').astype(str)

# Ensure there are no duplicate months
filtered_data = filtered_data[~filtered_data.index.duplicated(keep='first')]

# Set the index of the filtered_data DataFrame
filtered_data.set_index('Year', inplace=True)

# Merge dataframes on 'Year', using an outer join to handle missing data
merged_data = pd.merge(transposed_data, filtered_data, how='outer', left_index=True, right_index=True)

# Rename the index
merged_data.index = merged_data.index.rename('Date')

# Save the merged DataFrame to a new CSV file
merged_data.to_csv('inputs/merged_data.csv')

print(merged_data.head())



         10025.0  60657.0  10023.0  60614.0  79936.0  10002.0  60640.0  \
Date                                                                     
1993-01      NaN      NaN      NaN      NaN      NaN      NaN      NaN   
1993-02      NaN      NaN      NaN      NaN      NaN      NaN      NaN   
1993-03      NaN      NaN      NaN      NaN      NaN      NaN      NaN   
1993-04      NaN      NaN      NaN      NaN      NaN      NaN      NaN   
1993-05      NaN      NaN      NaN      NaN      NaN      NaN      NaN   

         77084.0  94109.0  75070.0  ...  76005.0  81225.0  \
Date                                ...                     
1993-01      NaN      NaN      NaN  ...      NaN      NaN   
1993-02      NaN      NaN      NaN  ...      NaN      NaN   
1993-03      NaN      NaN      NaN  ...      NaN      NaN   
1993-04      NaN      NaN      NaN  ...      NaN      NaN   
1993-05      NaN      NaN      NaN  ...      NaN      NaN   

         TotalWeightedObservations  GMSL_noGIA  StdDe

In [2]:
# Create a new DataFrame with data from 2010-01 to 2017-09
filtered_merged_data = merged_data.loc['2010-01':'2017-09']

# Print the new DataFrame
print(filtered_merged_data.head())

# If you want to save the new DataFrame to a CSV file
filtered_merged_data.to_csv('inputs/filtered_merged_data.csv')

          10025.0   60657.0  10023.0  60614.0   79936.0  10002.0  60640.0  \
Date                                                                        
2010-01  775000.0  349900.0      NaN      NaN  134700.0      NaN      NaN   
2010-02  725000.0  349000.0      NaN      NaN  130000.0      NaN      NaN   
2010-03  762500.0  339619.5      NaN      NaN  129900.0      NaN      NaN   
2010-04  725000.0  349000.0      NaN      NaN  129900.0      NaN      NaN   
2010-05  732500.0  334500.0      NaN      NaN  131450.0      NaN      NaN   

          77084.0  94109.0   75070.0  ...  76005.0  81225.0  \
Date                                  ...                     
2010-01  119900.0      NaN  191315.0  ...      NaN      NaN   
2010-02  119850.0      NaN  194900.0  ...      NaN      NaN   
2010-03  121950.0      NaN  190995.0  ...      NaN      NaN   
2010-04  120000.0      NaN  195000.0  ...      NaN      NaN   
2010-05  120000.0      NaN  194950.0  ...      NaN      NaN   

         TotalWeig

In [3]:
import pandas as pd

# Assuming filtered_merged_data is the given DataFrame

selected_zipcodes = [10025.0, 60657.0, 77084.0]

additional_columns = [
    "TotalWeightedObservations",
    "GMSL_noGIA",
    "StdDevGMSL_noGIA",
    "SmoothedGSML_noGIA",
    "GMSL_GIA",
    "StdDevGMSL_GIA",
    "SmoothedGSML_GIA",
    "SmoothedGSML_GIA_sigremoved",
]

selected_columns = selected_zipcodes + additional_columns

filtered_zip = filtered_merged_data.loc[:, selected_columns]
filtered_zip = filtered_zip.reset_index()
filtered_zip.to_csv('inputs/filtered_zip.csv', index=False)
print(filtered_zip)

       Date    10025.0   60657.0   77084.0  TotalWeightedObservations  \
0   2010-01   775000.0  349900.0  119900.0                  336505.31   
1   2010-02   725000.0  349000.0  119850.0                  337393.41   
2   2010-03   762500.0  339619.5  121950.0                  335680.31   
3   2010-04   725000.0  349000.0  120000.0                  335204.00   
4   2010-05   732500.0  334500.0  120000.0                  336418.59   
..      ...        ...       ...       ...                        ...   
88  2017-05  1459000.0  499000.0  189900.0                  336082.81   
89  2017-06  1397500.0  500000.0  189900.0                  334922.41   
90  2017-07  1315000.0  485000.0  189992.5                  333067.69   
91  2017-08  1320000.0  485000.0  189990.0                  331393.41   
92  2017-09  1295000.0  514724.0  189900.0                  333942.19   

    GMSL_noGIA  StdDevGMSL_noGIA  SmoothedGSML_noGIA  GMSL_GIA  \
0         8.30             84.67                9.54     

In [None]:
import pandas as pd

# Assuming filtered_merged_data is the given DataFrame

selected_zipcodes_inland = []

additional_columns = [
    "TotalWeightedObservations",
    "GMSL_noGIA",
    "StdDevGMSL_noGIA",
    "SmoothedGSML_noGIA",
    "GMSL_GIA",
    "StdDevGMSL_GIA",
    "SmoothedGSML_GIA",
    "SmoothedGSML_GIA_sigremoved",
]

selected_columns = selected_zipcodes_inland + additional_columns

filtered_zip = filtered_merged_data.loc[:, selected_columns]
filtered_zip = filtered_zip.reset_index()
filtered_zip.to_csv('inputs/filtered_zip_inland.csv', index=False)
print(filtered_zip)