### Train Model with Sport Road Segments 

In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import OneHotEncoder

# Load the data from csv
data = pd.read_csv('SampledData.csv')

# Filter out rows where 'Sport' column equals 'line' 
data_filtered = data[data['Sport'] != 'line']

#Select which columns we want to model
cat_columns = ['Sport', 'Timestamp', 'Weekday?']

# Perform one-hot encoding for all categorical columns
encoder = OneHotEncoder()
encoded_columns = pd.DataFrame(encoder.fit_transform(data_filtered[cat_columns]).toarray(), index=data_filtered.index)
encoded_columns.columns = encoder.get_feature_names_out(cat_columns)

# Select the columns that are numerical
num_columns = data_filtered[['Latitude', 'Longitude']]

# Combined the encoded categorical columns with the numerical ones
X = pd.concat([num_columns, encoded_columns], axis=1)

# Assign delta cost as Y
y = data_filtered['delta_cost']

# Split the dataset into training and testing sets (70% training, 30% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initializl model
model = LinearRegression()

# Fit the model on the training data
model.fit(X_train, y_train)

# Make predictions on the testing data
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

### Define Input Data

In [8]:
input_data = pd.DataFrame({
    'Sport': ['Football'],  # Example sport
    'Timestamp': ['070000PM'],  # Example timestamp
    'Weekday?': [True]  # Example weekday
})

### Put Lat, Long, WKT, and Input into One Dataframe

In [9]:
def csv_to_dicts(csv_file):
    """Function to convert CSV to a DF Dictionary"""
    df = pd.read_csv(csv_file, usecols=['Latitude', 'Longitude', 'geometry_wkt'])
    data_dicts = df.to_dict(orient='records')
    return data_dicts

#Convert Base Data to Dict
result = csv_to_dicts('ML_Base_data.csv')

# Get lat and long values from the dictionary
data_dicts = csv_to_dicts('ML_Base_data.csv')

# Initialize an empty list
data_frames = []

for data_dict in data_dicts:
    # Add lat, long, and wkt from each row in CSV
    latitude = data_dict['Latitude']
    longitude = data_dict['Longitude']
    wkt = data_dict['geometry_wkt']
    # Create a new dataframe with the same 'Sport', 'Timestamp', and 'Weekday?' values over and over
    new_data = input_data.copy()
    new_data['Latitude'] = latitude
    new_data['Longitude'] = longitude
    new_data['wkt'] = wkt
    # Append the new dataframes to the list
    data_frames.append(new_data)

# Concatenate all the dataframe in the list
output_data = pd.concat(data_frames, ignore_index=True)

### Predict Delta Cost

In [10]:
def predict_delta_cost(input_data, encoder, model):
    """Function that uses Linear Regression Model to predict delta cost based off of different variables"""
    # Perform one-hot encoding for categorical columns in the input data
    encoded_input = pd.DataFrame(encoder.transform(input_data[cat_columns]).toarray(), index=input_data.index)
    encoded_input.columns = encoder.get_feature_names_out(cat_columns)
    
    # Combined the encoded categorical columns with the numerical ones
    input_features = pd.concat([input_data[['Latitude', 'Longitude']], encoded_input], axis=1)
    
    # Predict delta cost
    predicted_delta_cost = model.predict(input_features)
    
    # Add predicted delta cost to the input dataframe
    input_data['delta_cost'] = predicted_delta_cost
    
    return input_data

# Load dataframe from above
input_data = output_data

# Call the prediction function and convert to CSV
output_data_delta = predict_delta_cost(input_data, encoder, model)
output_data_delta.to_csv('outputtestdelta.csv', index=False)

### User Selection (DOES NOT WORK)

In [10]:
def get_sport_choice():
    while True:
        sport = input("Enter your preferred sport (Football, Volleyball, Basketball, Hockey): ").strip().capitalize()
        if sport in ['Football', 'Volleyball', 'Basketball', 'Hockey']:
            return sport
        else:
            print("Invalid sport choice. Please choose from Football, Volleyball, Basketball, or Hockey.")

def get_time():
    while True:
        time_input = input("Enter the time (format: HHMMSSAM/PM): ").strip().upper()
        if len(time_input) == 8:
            return time_input
        else:
            print("Invalid time format. Please enter time in the format HHMMSSAM/PM.")

def get_weekday():
    while True:
        weekday_input = input("Is it a weekday? (True/False): ").strip().capitalize()
        if weekday_input in ['True', 'False']:
            return weekday_input == 'True'
        else:
            print("Invalid input. Please enter True or False.")

# Get user inputs
sport = get_sport_choice()
time = get_time()
weekday = get_weekday()

# Output the collected information
print("Sport:", sport)
print("Time:", time)
print("Weekday?", weekday)


Enter your preferred sport (Football, Volleyball, Basketball, Hockey): Volleyball
Enter the time (format: HHMMSSAM/PM): 080000PM
Is it a weekday? (True/False): True
Sport: Volleyball
Time: 080000PM
Weekday? True
