# Week 7 Deliverable: Continuing Neural Network
This week, we will be building on our model building from Week 6, not just improving model performance through hyperparameter tuning but also saving results in a more clean and efficient way. We will also be testing out doing predictions for 6 months rather than 12, so we will be performing 

In [1]:
from _Setup import *
from _Functions import *

Installing dependencies from requirements.txt...
All dependencies installed successfully.


In [4]:
# Import sector-level data
sector_df = pd.read_csv(sector_data_csv_path, low_memory=False)
sector_df.replace(to_replace="2025 (FYTD)", value=2025, inplace=True)
sector_df.replace(to_replace='2024', value=2024, inplace=True)

# Apply the function to create a Year-Date column in fytd_df
sector_df['Year-Date'] = sector_df.apply(lambda row: convert_to_fiscal_year_date(row), axis=1)

# Convert the new column to datetime format
sector_df['Year-Date'] = pd.to_datetime(sector_df['Year-Date'], format='%Y-%m-%d')

# Split the data into training and testing sets
train_df = sector_df[sector_df['Year-Date'] < '2024-07-01']
test_df = sector_df[sector_df['Year-Date'] >= '2024-07-01']

# Save the dataframes to csv
train_df.to_csv(sector_data_csv_path_train, index=False)
test_df.to_csv(sector_data_csv_path_test, index=False)

# These columns should be captured in Year-Date and we remove abbreviated columns that are unnecessary.
columns_to_remove = ["Fiscal Year", "Month Grouping", "Month (abbv)", "AOR (Abbv)"]
sector_train_dropped = train_df.drop(columns = columns_to_remove)
sector_test_dropped = test_df.drop(columns = columns_to_remove)


In [5]:
# Aggregating by 'Area of Responsibility' and 'Year-Date'
sector_train_aggregated = sector_train_dropped.groupby(["Area of Responsibility", "Year-Date"], as_index=False).agg({"Encounter Count": "sum"})
sector_train_pivot = sector_train_aggregated.pivot(index = "Area of Responsibility", columns = "Year-Date", values="Encounter Count")
sector_train_pivot = sector_train_pivot.fillna(0)
sector_train_pivot.to_csv(sector_data_csv_path_train_pivoted)

In [6]:
# Aggregating by 'Area of Responsibility' and 'Year-Date'
sector_test_aggregated = sector_test_dropped.groupby(["Area of Responsibility", "Year-Date"], as_index=False).agg({"Encounter Count": "sum"})
sector_test_pivot = sector_test_aggregated.pivot(index = "Area of Responsibility", columns = "Year-Date", values="Encounter Count")
sector_test_pivot = sector_test_pivot.fillna(0)
sector_test_pivot.to_csv(sector_data_csv_path_test_pivoted)
sector_test_pivot.head()

Year-Date,2024-07-01,2024-08-01,2024-09-01,2024-10-01,2024-11-01,2024-12-01
Area of Responsibility,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Atlanta Field Office,915,869,951,667,484,470
Baltimore Field Office,1126,1226,1136,1156,1171,950
Big Bend Sector,214,308,349,291,282,503
Blaine Sector,99,66,63,69,58,34
Boston Field Office,3989,3452,3432,2684,2031,2277


In [None]:
# Dictionary to store scalers
scalers = {}

# Iterate through rows and fit a separate MinMaxScaler for each row
for idx, row in sector_train_pivot.iterrows():
    scaler = MinMaxScaler()
    
    # Reshape row into a 2D array with a single column to fit the scaler
    row_array = row.to_numpy().reshape(-1, 1)  
    tf = scaler.fit_transform(row_array)  # Fit scaler to the row
    
    # Store the scaler using the row index as the key
    scalers[idx] = scaler


# Save the list to a .pickle file
with open(scalers_file, "wb") as file:
    pickle.dump(scalers, file)

In [None]:

sector_train = pd.read_csv(sector_data_csv_path_train_pivoted, index_col='Area of Responsibility')

# Ensure scalers work
atlanta_scaler_info = scalers['Atlanta Field Office']
print(atlanta_scaler_info)

# Display the min and max values used for scaling
print("Min values:", atlanta_scaler_info.data_min_)
print("Max values:", atlanta_scaler_info.data_max_)

# Display the feature range
print("Feature range:", atlanta_scaler_info.feature_range)

# Display the scale and min values used for transformation
print("Scale:", atlanta_scaler_info.scale_)
print("Min:", atlanta_scaler_info.min_)

In [None]:
# Create an empty DataFrame to store scaled values
sector_train_scaled = pd.DataFrame(index=sector_train.index, columns=sector_train.columns)

# Apply each pre-trained scaler row-wise
for index, row in sector_train.iterrows():
	scaler = scalers[index]
	scaled_row = scaler.transform(row.values.reshape(-1, 1)).flatten()
	sector_train_scaled.loc[index] = scaled_row