In [67]:
import pandas as pd
import numpy as np
from pandas.api.types import infer_dtype
pd.set_option("display.max_rows", 200)
pd.set_option('display.max_colwidth', None)

class FIFA_Processing:
    
    def __init__(self):
        self.sysdata   = ["sofifa_id",  'real_face', 'player_url', 'year']
        self.constants = ['short_name','long_name', 'dob']
        self.variables = ["value_eur", 'nation_position', 'club_name', 'potential', 'wage_eur', 'international_reputation',
                          'release_clause_eur', 'loaned_from', 'weight_kg', 'league_name', 'contract_valid_until', 'player_positions',
                          'player_traits', 'height_cm', 'preferred_foot', 'team_position', 'work_rate', 'joined', 'age', 'body_type',
                          'team_jersey_number', 'player_tags', 'league_rank', 'nation_jersey_number', 'nationality']
        self.abilities = ['overall', 'shooting', 'dribbling', 'mentality_positioning', 'goalkeeping_positioning', 'goalkeeping_diving',
                          'goalkeeping_kicking', 'gk_speed', 'goalkeeping_handling', 'skill_moves', 'skill_ball_control',
                          'defending', 'gk_positioning', 'gk_reflexes', 'attacking_volleys', 'weak_foot', 'power_long_shots',
                          'movement_reactions', 'movement_balance', 'defending_standing_tackle', 'gk_diving', 'movement_acceleration',
                          'attacking_finishing', 'movement_sprint_speed', 'skill_curve', 'mentality_composure', 'skill_dribbling',
                          'mentality_aggression', 'attacking_heading_accuracy', 'gk_handling', 'power_shot_power', 'mentality_vision',
                          'goalkeeping_reflexes', 'passing', 'mentality_interceptions', 'pace', 'skill_long_passing', 'defending_marking',
                          'power_strength', 'defending_sliding_tackle', 'attacking_short_passing', 'physic', 'mentality_penalties',
                          'gk_kicking', 'skill_fk_accuracy', 'power_stamina',  'movement_agility', 'power_jumping', 'attacking_crossing']
        self.positions = ['cf','lb','ls','lm','rs','cb','rf','st','rm','lf','cm','rw','rb','lw',
                          'lcb','cam','rdm','lcm','ldm','rcm','rcb','rwb','lwb','ram','lam','cdm']
        self.semantics = {}
        for i in "sysdata", "constants", "variables", "positions", "abilities":
            for k in eval(f"self.{i}"):
                self.semantics[k] = i
        self.import_data()
        self.process_data()
        
    def import_data(self):
        self.data = {}
        for year in np.arange(15, 22):
            self.data[f"df_{str(year)}"] = {"df": pd.read_csv(f"players_{str(year)}.csv")}
            self.data[f"df_{str(year)}"]["df"]["year"] = int("20" + str(year))
    
    def process_data(self):
        select_cols = list(self.semantics.keys())
        main_df = pd.DataFrame(columns = select_cols)
        for year in np.arange(15, 22):
            df = self.data[f"df_{str(year)}"]["df"][select_cols]
            main_df = pd.concat([main_df, df], axis = 0).reset_index(drop=True)
        meta = self.get_metadata(main_df)
        group_cols  = ["long_name","short_name","player_url","dob","real_face"]
        checking_df = main_df[meta[meta["semantic_type"].isin(["sysdata", "constants"])]["colname"]]
        master      = checking_df.groupby(["sofifa_id"])[group_cols].agg("max")[group_cols].drop_duplicates().reset_index(drop=False)
        master_df   = pd.merge(left = main_df.drop(group_cols, axis = 1), right = master,
                               left_on = "sofifa_id", right_on = "sofifa_id", how = "left")
        self.master_df = master_df[select_cols]
        self.master_df = self.master_df.copy()[~pd.isnull(self.master_df["club_name"])]
        null_replacements = {
            "nation_position" : "NA",
            "nation_jersey_number" : "NA",
            "loaned_from" : "NA",
            "player_traits": "None",
            "player_tags": "None"
        }
        for k, v in list(null_replacements.items()):
            self.master_df[k] = self.master_df[k].apply(lambda x: v if pd.isnull(x) else x)
        self.master_df["has_release_clause"] = self.master_df["release_clause_eur"].apply(lambda x: "No" if pd.isnull(x) else "Yes")
        

    def get_metadata(self, df):
        meta = pd.DataFrame({
            "colname"       : [i for i in df.columns],
            "count"         : [len(df[i]) for i in df.columns],
            "null_count"    : [df[i].isnull().sum() for i in df.columns],
            "semantic_type" : [self.semantics[i] for i in df.columns],
            "data_type"     : [infer_dtype(df[i]) for i in df.columns]
        })
        return meta
    
    def get_data(self, df_type):
        if df_type == "full":
            return self.master_df
        elif df_type == "working":
            return self.master_df[self.sysdata + self.constants + self.variables + ["overall"]]
        
    def get_all_tags(self):
        all_tags = []
        for i in self.master_df["player_tags"]:
            if i is not None:
                all_tags.extend([k.replace("#", "").strip().replace(" ", "_") for k in i.split(",")])
        return list(set(all_tags))

    def get_all_traits(self):
        all_traits = []
        for i in self.master_df["player_traits"]:
            if i is not None:
                all_traits.extend([k.replace("(AI)", "").strip().replace(" ","_").replace("-","") for k in i.split(",")])
        return list(set(all_traits))

In [68]:
FIFA = FIFA_Processing()
df = FIFA.get_data(df_type = "working")

In [69]:
meta = FIFA.get_metadata(df)
meta

Unnamed: 0,colname,count,null_count,semantic_type,data_type
0,sofifa_id,121272,0,sysdata,integer
1,real_face,121272,0,sysdata,string
2,player_url,121272,0,sysdata,string
3,year,121272,0,sysdata,integer
4,short_name,121272,0,constants,string
5,long_name,121272,0,constants,string
6,dob,121272,0,constants,string
7,value_eur,121272,0,variables,integer
8,nation_position,121272,0,variables,string
9,club_name,121272,0,variables,string


In [70]:
all_tags = FIFA.get_all_tags()
all_traits = FIFA.get_all_traits()


print(all_tags)
print(all_traits)

['None', 'Strength', 'Playmaker', 'Engine', 'Dribbler', 'Distance_Shooter', 'Aerial_Threat', 'Acrobat', 'Tackling', 'Complete_Forward', 'FK_Specialist', 'Tactician', 'Speedster', 'Complete_Defender', 'Poacher', 'Complete_Midfielder', 'Clinical_Finisher', 'Crosser']
['Power_Header', 'Puncher', 'Swerve_Pass', 'Target_Forward', 'Comes_For_Crosses', 'Flair', 'Outside_Foot_Shot', 'Technical_Dribbler', 'Playmaker', 'Long_Passer', 'Set_Play_Specialist', 'Team_Player', 'One_Club_Player', 'Saves_with_Feet', 'Beat_Offside_Trap', 'GK_Up_for_Corners', 'Backs_Into_Player', 'Finesse_Shot', '1on1_Rush', 'Dives_Into_Tackles', 'Takes_Finesse_Free_Kicks', 'Speed_Dribbler', 'Rushes_Out_Of_Goal', 'Leadership', 'Cautious_With_Crosses', 'Injury_Free', 'Diver', 'Selfish', 'Through_Ball', 'Solid_Player', 'Avoids_Using_Weaker_Foot', 'Long_Throwin', 'Giant_Throwin', 'Long_Shot_Taker', 'None', 'GK_Long_Throw', 'Early_Crosser', 'Chip_Shot', 'Power_FreeKick', 'Injury_Prone']
