### Train Model with Sport Road Segments 

In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import OneHotEncoder

# Load the data from the CSV file into a DataFrame
data = pd.read_csv('SampledData.csv')

# Filter out rows where 'Sport' column equals 'line'
data_filtered = data[data['Sport'] != 'line']

# Select the categorical columns of interest
categorical_columns = ['Sport', 'Timestamp', 'Weekday?']

# Perform one-hot encoding for all categorical columns
encoder = OneHotEncoder()
encoded_columns = pd.DataFrame(encoder.fit_transform(data_filtered[categorical_columns]).toarray(), index=data_filtered.index)
encoded_columns.columns = encoder.get_feature_names_out(categorical_columns)

# Select the numerical columns
numerical_columns = data_filtered[['Latitude', 'Longitude']]

# Concatenate the encoded categorical columns with the numerical ones
X = pd.concat([numerical_columns, encoded_columns], axis=1)

# Assuming 'delta_cost' is the column you want to predict
y = data_filtered['delta_cost']

# Split the dataset into training and testing sets (70% training, 30% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize the Linear Regression model
model = LinearRegression()

# Fit the model on the training data
model.fit(X_train, y_train)

# Make predictions on the testing data
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Print the evaluation metrics
print("Mean Squared Error:", mse)
print("R-squared Score:", r2)

# Print the coefficients and intercept of the model
print("\nModel Coefficients:")
for feature, coef in zip(X.columns, model.coef_):
    print(f"{feature}: {coef}")
print("Intercept:", model.intercept_)

Mean Squared Error: 0.03234144981396423
R-squared Score: 0.0019414801746177046

Model Coefficients:
Latitude: -0.2423170746646312
Longitude: -0.17264737497245283
Sport_Basketball: 94260444.36481883
Sport_Football: 94260444.36425596
Sport_Hockey: 94260444.36277376
Sport_Volleyball: 94260444.36521196
Timestamp_010000PM: 2188619422.3687735
Timestamp_011500PM: 2188619422.371376
Timestamp_013000PM: 2188619422.366519
Timestamp_020000PM: 2188619422.368259
Timestamp_023000PM: 2188619422.36927
Timestamp_030000PM: 2188619422.3675385
Timestamp_033000PM: 2188619422.3652377
Timestamp_040000PM: 2188619422.367567
Timestamp_041500PM: 2188619422.364435
Timestamp_043000PM: 2188619422.3664136
Timestamp_050000PM: 2188619422.3675137
Timestamp_053000PM: 2188619422.369517
Timestamp_060000PM: 2188619422.3683214
Timestamp_063000PM: 2188619422.3658524
Timestamp_070000PM: 2188619422.367324
Timestamp_071500PM: 2188619422.3671327
Timestamp_073000PM: 2188619422.366307
Timestamp_080000PM: 2188619422.365872
Timestamp

### Get delta cost using model

In [10]:
def predict_delta_cost(input_data):
    # Perform one-hot encoding for categorical columns in the input data
    encoded_input = pd.DataFrame(encoder.transform(input_data[categorical_columns]).toarray(), index=input_data.index)
    encoded_input.columns = encoder.get_feature_names_out(categorical_columns)
    
    # Concatenate the encoded categorical columns with the numerical ones
    input_features = pd.concat([input_data[['Latitude', 'Longitude']], encoded_input], axis=1)
    
    # Predict delta cost
    predicted_delta_cost = model.predict(input_features)
    
    return predicted_delta_cost

# Example usage:
input_data = pd.DataFrame({
    'Latitude': [44.93864000000005],  # Example latitude
    'Longitude': [-93.2752849999999],  # Example longitude
    'Sport': ['Football'],  # Example sport
    'Timestamp': ['070000PM'],  # Example timestamp
    'Weekday?': [True]  # Example weekday
})

predicted_cost = predict_delta_cost(input_data)
print("Predicted Delta Cost:", predicted_cost)

Predicted Delta Cost: [0.04201126]


### Put Columns for Model into Dictionary

In [3]:
import pandas as pd

def csv_to_dicts(csv_file):
    df = pd.read_csv(csv_file, usecols=['Sport', 'Timestamp', 'Weekday?'])
    data_dicts = df.to_dict(orient='records')
    return data_dicts

# Example usage:
csv_file = 'AllTrafficCost.csv'  # Replace 'your_csv_file.csv' with the path to your CSV file
result = csv_to_dicts(csv_file)
