In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import re
import warnings

In [2]:
import warnings
from pandas.errors import SettingWithCopyWarning
warnings.simplefilter(action='ignore', category=(SettingWithCopyWarning))
warnings.simplefilter(action='ignore', category=(FutureWarning))

In [3]:
pdf = pd.read_csv("players_db/fm23/platinum22_fm23data.csv")
udf = pd.read_csv("players_db/fm23/fm23db_processed.csv")

In [4]:
df = pd.merge(pdf, udf, how='inner', left_on='Name', right_on='Name')

In [5]:
positions = ['DM', 'AML', 'AMC', 'GK', 'MC', 'DC', 'AMR', 'DL', 'DR', 'ST',
       'ML', 'MR', 'WBR', 'WBL']

df = df[["Name", "Best_Pos"] + positions]

In [6]:
import pandas as pd
import statsmodels.api as sm

# Initialize a list to store the results
results = []

# Iterate over each unique Best_Pos
for best_pos in df['Best_Pos'].unique():
    # Filter rows where Best_Pos matches the current best_pos
    df_best_pos = df[df['Best_Pos'] == best_pos]
    
    # Dictionary to store the regression coefficients for this best_pos
    coefficients = {'Best_Pos': best_pos}
    
    # Iterate over each position to generate the regression model
    for pos in positions:
        # Independent variable (rating in best position)
        X = df_best_pos[best_pos]
        
        # Dependent variable (rating in another position)
        Y = df_best_pos[pos]
        
        if len(X) < 2:  # Skip if not enough data to fit a model
            coefficients[f'{pos}_intercept'] = None
            coefficients[f'{pos}_slope'] = None
            continue
        
        # Add constant to the independent variable
        X = sm.add_constant(X)
        
        # Fit the regression model
        model = sm.OLS(Y, X).fit()
        
        # Get the regression coefficients
        if len(model.params) == 2:  # Check if model.params has two elements
            intercept, slope = model.params
            coefficients[f'{pos}_intercept'] = round(intercept, 2)
            coefficients[f'{pos}_slope'] = round(slope, 2)
        else:
            coefficients[f'{pos}_intercept'] = None
            coefficients[f'{pos}_slope'] = None
    
    # Append the coefficients dictionary to results
    results.append(coefficients)

# Convert the results list to a DataFrame
results_df = pd.DataFrame(results)

# Save the results to a CSV file
results_df.to_csv('players_db/fm23/position_regression_formulas.csv', index=False)

print("Regression formulas have been saved to position_regression_formulas.csv")


Regression formulas have been saved to position_regression_formulas.csv


In [7]:
df.iloc[0]

Name        Kevin De Bruyne
Best_Pos                 MC
DM                       12
AML                      14
AMC                      20
GK                        1
MC                       20
DC                        1
AMR                      14
DL                        1
DR                        1
ST                       12
ML                       15
MR                       15
WBR                       1
WBL                       1
Name: 0, dtype: object

In [8]:
tf = pd.read_csv("players_db/fm23/position_regression_formulas.csv")

In [9]:
data = tf[tf.Best_Pos=="DM"]

In [10]:
data

Unnamed: 0,Best_Pos,DM_intercept,DM_slope,AML_intercept,AML_slope,AMC_intercept,AMC_slope,GK_intercept,GK_slope,MC_intercept,...,ST_intercept,ST_slope,ML_intercept,ML_slope,MR_intercept,MR_slope,WBR_intercept,WBR_slope,WBL_intercept,WBL_slope
7,DM,0.0,1.0,10.13,-0.43,5.49,-0.03,1.0,-0.0,2.48,...,1.03,0.01,8.08,-0.28,10.04,-0.34,9.56,-0.36,11.69,-0.5


In [11]:
for pos in positions:
    m = f'{pos}_slope'
    n = f'{pos}_intercept'
    y = data[m].iloc[0]*20 + data[n].iloc[0]
    print(pos, int(y))

DM 20
AML 1
AMC 4
GK 1
MC 17
DC 8
AMR 1
DL 1
DR 3
ST 1
ML 2
MR 3
WBR 2
WBL 1
