## First, I need user input. Provide a player name, the opposing team, and map being played(if known).
### This code below will run error checking and save the values given

In [1]:
import pandas as pd
from difflib import SequenceMatcher

# Load the datasets
gameScoreboard = 'Data/OfficialGame_Scoreboard.csv'
games = 'Data/OfficialGames.csv'

# Reading the CSV files
scoreboard_df = pd.read_csv(gameScoreboard)
games_df = pd.read_csv(games)

#The data didn't have a way of comparing TeamAbbreviated to TeamID/Team names so I used this code to convert it.
def similarity(a, b):
    """Calculate the similarity score between two strings."""
    return SequenceMatcher(None, a, b).ratio()

def find_most_similar_team(abbreviation, team1, team2):
    """Find the most similar team name to the abbreviation."""
    sim1 = similarity(abbreviation, team1)
    sim2 = similarity(abbreviation, team2)
    return team1 if sim1 >= sim2 else team2

#Method to make sure input values are valid. 
def checkInputValues(player_name, map_name, oteam_name):
    if not player_name.strip():
        return "Player name cannot be empty."
    if not map_name.strip():
        return "Map name cannot be empty."
    if not oteam_name.strip():
        return "Opposing team name cannot be empty."

    if player_name not in scoreboard_df['PlayerName'].values:
        return "Player name not found in the dataset."
    if map_name not in games_df['Map'].values:
        return "Map name not found in the dataset."
    if oteam_name not in games_df['Team1'].values and oteam_name not in games_df['Team2'].values:
        return "Opposing team name not found in the dataset."

    return "All inputs are valid."

# Get user input
player_name = input("Enter player name: ")
map_name = input("Enter map name: ")
oteam_name = input("Enter opposing team name: ")



### Next, I need to make a data set based off that information. This data set will be used to make features for the model

In [2]:
#Method to create the data set for the player
def create_player_dataset(player_name, scoreboard_df, games_df):
    # Filter the scoreboard for the player
    targetplayer_data = scoreboard_df[scoreboard_df['PlayerName'] == player_name]

    # Combining that data with the games dataset to get additional information (using GameID)
    combined_data = targetplayer_data.merge(games_df, on='GameID', how='left')

    # Handling the opposing team by finding the most similar team to the TeamAbbreviation
    combined_data['PlayerTeam'] = combined_data.apply(
        lambda row: find_most_similar_team(row['TeamAbbreviation'], row['Team1'], row['Team2']), axis=1)
    combined_data['OpposingTeam'] = combined_data.apply(
        lambda row: row['Team2'] if row['PlayerTeam'] == row['Team1'] else row['Team1'], axis=1)

    return combined_data

# Assuming user input has been validated
player_dataset = create_player_dataset(player_name, scoreboard_df, games_df)

# Show the first few rows of the dataset
print(player_dataset.head())

   GameID  PlayerID PlayerName TeamAbbreviation Agent    ACS  Kills  Deaths  \
0   60894    8419.0     Reduxx             Boos  jett  313.0   24.0    10.0   
1   60895    8419.0     Reduxx             Boos  skye   94.0    5.0    13.0   
2   60896    8419.0     Reduxx             Boos  jett  339.0   25.0    14.0   
3   60888    8419.0     Reduxx             Boos  jett  302.0   21.0    12.0   
4   60889    8419.0     Reduxx             Boos  jett  176.0   12.0    16.0   

   Assists  PlusMinus  ...  Team2_Eco  Team2_EcoWon  Team2_SemiEco  \
0      3.0       14.0  ...        4.0           0.0            2.0   
1      0.0       -8.0  ...        2.0           2.0            0.0   
2      5.0       11.0  ...        2.0           1.0            2.0   
3      3.0        9.0  ...        2.0           2.0            0.0   
4      3.0       -4.0  ...        3.0           1.0            3.0   

   Team2_SemiEcoWon  Team2_SemiBuy  Team2_SemiBuyWon  Team2_FullBuy  \
0               0.0            4.

### Now I will make new features to put into the model

Ideas for features: Avg KDA, Avg ACS, Win percentage in a map, etc.

In [3]:
#Have to do this

player_dataset['AvgKills'] = player_dataset.groupby('PlayerName')['Kills'].transform('mean')
player_dataset['AvgDeaths'] = player_dataset.groupby('PlayerName')['Deaths'].transform('mean')
player_dataset['AvgAssists'] = player_dataset.groupby('PlayerName')['Assists'].transform('mean')
player_dataset['AvgACS'] = player_dataset.groupby('PlayerName')['ACS'].transform('mean')
player_dataset['PlayerWin'] = (player_dataset['PlayerTeam'] == player_dataset['Winner']).astype(int)
player_dataset['MapWinRate'] = player_dataset.groupby(['PlayerName', 'Map'])['PlayerWin'].transform('mean')





# Training the Model with the new data set and new features
Will be completed once I finish the features

In [15]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import pandas as pd

games = player_dataset


#'kills' is the target variable and 'feature1', 'feature2'... are the predictors
X = player_dataset[['AvgKills','AvgACS','PlayerWin','MapWinRate']]  # Need to replace with actual features
y = player_dataset['Kills']


# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

# Create a linear regression model
model = LinearRegression()

# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print("Kill Prediction: ", mse)


Kill Prediction:  35.80228637771036


### TODO: Need more data for training, need more features