<a href="https://colab.research.google.com/github/WRudraksh/Projects/blob/main/FPL_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd

# Assuming you already have a DataFrame `df`
df = pd.read_csv('final_output.csv')
# Replace 'element' column with unique numeric IDs
df["element"] = range(1, len(df) + 1)  # Unique numbers starting from 1

# Display the first few rows
print(df.head())


   element        full_name   season  team position  total_points  \
0        1    Armando Broja  2022-23     6      FWD            21   
1        2     Fabian Schär  2022-23    15      DEF           139   
2        3      Jonny Evans  2022-23    10      DEF            20   
3        4  Brennan Johnson  2022-23    16      FWD           122   
4        5  Cheick Doucouré  2022-23     7      MID            74   

   goals_scored  assists  clean_sheets  saves  ...  opponent_difficulty  \
0             1        1             1      0  ...                  0.0   
1             1        3            15      0  ...                  0.0   
2             0        0             2      0  ...                  0.0   
3             8        5             7      0  ...                  0.0   
4             0        3             7      0  ...                  0.0   

   fantasy_points  avg_points_last_3  avg_goals_last_3  avg_assists_last_3  \
0             9.0           0.080925               1.0  

In [None]:
df_agg = df.groupby("full_name").agg({
    "position_encoded": "first",  # Keep first occurrence (position is constant)
    "team_encoded": "first",  # Keep first occurrence (team is constant)
    "current_team_encoded": "first",  # Keep current team (if available)
    "now_cost": "mean",  # Average cost (or use max if needed)

    # Performance Stats - Summing up
    "fantasy_points": "sum",
    "total_points": "sum",
    "goals_scored": "sum",
    "assists": "sum",
    "clean_sheets": "sum",
    "saves": "sum",
    "minutes": "sum",
    "expected_goals": "sum",
    "expected_assists": "sum",
    "penalties_saved": "sum",
    "penalties_missed": "sum",

    # Discipline Stats - Summing up
    "yellow_cards": "sum",
    "red_cards": "sum",
    "own_goals": "sum",
    "bonus": "sum",

    # Recent Form - Taking Average
    "form": "mean",
    "form_variability": "mean",
    "avg_goals_last_3": "mean",
    "avg_assists_last_3": "mean",
    "avg_points_last_3": "mean",

    # Difficulty Metrics - Taking Average
    "opponent_difficulty": "mean",
    "home_difficulty": "mean",
}).reset_index()


In [None]:
import pandas as pd

# Assuming you already have a DataFrame `df`

# Replace 'element' column with unique numeric IDs
df_agg["element"] = range(1, len(df_agg) + 1)  # Unique numbers starting from 1

# Display the first few rows
print(df_agg.head())


            full_name  position_encoded  team_encoded  current_team_encoded  \
0     Aaron Cresswell               0.0          10.0                  18.0   
1        Aaron Hickey               0.0          14.0                   3.0   
2      Aaron Ramsdale               2.0           0.0                  16.0   
3   Aaron Wan-Bissaka               0.0           5.0                  18.0   
4  Abdoulaye Doucouré               3.0          18.0                   7.0   

   now_cost  fantasy_points  total_points  goals_scored  assists  \
0      39.0           754.0          2146             0       29   
1      43.0           744.0          1953             0       93   
2      44.0          2728.0          4433             0        0   
3      46.0           928.0          1943             0        0   
4      51.0           840.0          2280           150       60   

   clean_sheets  ...  own_goals  bonus      form  form_variability  \
0           174  ...          0    232  0.1416

In [None]:
## import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error

# Load dataset
df = df_agg  # Replace with actual dataset

# Scale `now_cost`

# Define features (X) & target (y)
features = [
    'minutes', 'goals_scored', 'assists', 'clean_sheets', 'saves',
    'expected_goals', 'expected_assists', 'bonus', 'form',
    'opponent_difficulty', 'home_difficulty', 'form_variability',
    'position_encoded', 'team_encoded'
]
target = 'fantasy_points'

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(df[features], df[target], test_size=0.2, random_state=42)

# Train XGBoost model
xgb_model = xgb.XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=5)
xgb_model.fit(X_train, y_train)

# Evaluate model
y_pred = xgb_model.predict(X_test)
print(f"Mean Absolute Error: {mean_absolute_error(y_test, y_pred)}")


Mean Absolute Error: 149.40038714408874


In [None]:
# Define the correct mapping for position_encoded
position_mapping = {0: "DEF", 1: "FWD", 2: "GK", 3: "MID"}

# Create a new 'position' column based on 'position_encoded'
df["position"] = df["position_encoded"].map(position_mapping)

# Verify the mapping
print(df[["position_encoded", "position"]].head())


   position_encoded position
0               0.0      DEF
1               0.0      DEF
2               2.0       GK
3               0.0      DEF
4               3.0      MID


In [None]:
import pandas as pd

def select_best_fpl_team(df, budget_limit=1000):
    """
    Select the best 15 players for FPL based on fantasy points while staying within the budget.
    Ensures a balanced squad with 2 GK, 5 DEF, 5 MID, and 3 FWD.
    Uses a greedy knapsack-like approach with budget constraints.
    """
    num_matches = int(input("Enter the number of matches: "))
    selected_fixtures = []

    for i in range(1, num_matches + 1):
        home_team = int(input(f"Enter home team (encoded) for match {i}: "))
        away_team = int(input(f"Enter away team (encoded) for match {i}: "))
        selected_fixtures.append(home_team)
        selected_fixtures.append(away_team)

    # Filter players from selected teams
    filtered_df = df[df["team_encoded"].isin(selected_fixtures)]

    # Calculate efficiency (fantasy points per cost) and sort
    filtered_df["efficiency"] = filtered_df["fantasy_points"] / filtered_df["now_cost"]
    filtered_df = filtered_df.sort_values(by=["efficiency", "fantasy_points"], ascending=[False, False])

    # Position-based squad requirements (GK=2, DEF=5, MID=5, FWD=3)
    squad_requirements = {2: 2, 0: 5, 3: 5, 1: 3}  # GK: 2, DEF: 5, MID: 5, FWD: 3
    selected_team = []
    budget_used = 0

    for position, count in squad_requirements.items():
        position_players = filtered_df[filtered_df["position_encoded"] == position]
        selected_players = []

        for _, player in position_players.iterrows():
            if len(selected_players) < count and budget_used + player["now_cost"] <= budget_limit:
                selected_players.append(player)
                budget_used += player["now_cost"]
            if len(selected_players) == count:
                break  # Stop once we have the required players

        selected_team.extend(selected_players)

    # If fewer than 15 players are selected, add the best available players within budget
    while len(selected_team) < 15:
        remaining_budget = budget_limit - budget_used
        available_players = filtered_df[~filtered_df["full_name"].isin([p["full_name"] for p in selected_team])]
        available_players = available_players[available_players["now_cost"] <= remaining_budget]

        if not available_players.empty:
            best_extra_player = available_players.iloc[0]  # Take the most efficient player within budget
            selected_team.append(best_extra_player)
            budget_used += best_extra_player["now_cost"]
        else:
            break  # No more players can be added

    # Convert to DataFrame
    selected_team_df = pd.DataFrame(selected_team)

    # Display output
    print("\n🔹 Best FPL Team for Given Matches")
    print(selected_team_df[["full_name", "position_encoded", "team_encoded", "now_cost", "fantasy_points"]])
    print(f"\n💰 Total Budget Used: {budget_used:.1f} / {budget_limit} credits")
    print(f"✅ Total Players Selected: {len(selected_team_df)} / 15")

    return selected_team_df

# Run function11117
best_team = select_best_fpl_team(df)


Enter the number of matches: 10
Enter home team (encoded) for match 1: 0
Enter away team (encoded) for match 1: 8
Enter home team (encoded) for match 2: 19
Enter away team (encoded) for match 2: 18
Enter home team (encoded) for match 3: 15
Enter away team (encoded) for match 3: 13
Enter home team (encoded) for match 4: 2
Enter away team (encoded) for match 4: 9
Enter home team (encoded) for match 5: 4
Enter away team (encoded) for match 5: 1
Enter home team (encoded) for match 6: 12
Enter away team (encoded) for match 6: 10
Enter home team (encoded) for match 7: 16
Enter away team (encoded) for match 7: 6
Enter home team (encoded) for match 8: 14
Enter away team (encoded) for match 8: 3
Enter home team (encoded) for match 9: 5
Enter away team (encoded) for match 9: 17
Enter home team (encoded) for match 10: 11
Enter away team (encoded) for match 10: 7

🔹 Best FPL Team for Given Matches
                    full_name  position_encoded  team_encoded  now_cost  \
205       José Malheiro de

In [None]:
!pip install streamlit
!streamlit run app.py & npx localtunnel --port 8501


Collecting streamlit
  Downloading streamlit-1.43.2-py2.py3-none-any.whl.metadata (8.9 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.43.2-py2.py3-none-any.whl (9.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.7/9.7 MB[0m [31m55.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m71.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[

In [None]:
%%writefile app.py
import streamlit as st
import pandas as pd

st.title("My Data Analysis App")

# Load your CSV
df = pd.read_csv("final_output.csv")
st.write(df)
