## Kombinasi 3 :
- Delete duplicate
- Impute missing value using iterative imputer
- Outlier capping with winsorization
- Encoding
- Standard Scaler
- Feature selection with Decision Tree

In [37]:
# import library
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

In [38]:
df = pd.read_csv("../UFC_train.csv")

df.head()

Unnamed: 0,R_fighter,B_fighter,Referee,date,location,title_bout,weight_class,B_avg_KD,B_avg_opp_KD,B_avg_SIG_STR_pct,...,R_win_by_KO/TKO,R_win_by_Submission,R_win_by_TKO_Doctor_Stoppage,R_Stance,R_Height_cms,R_Reach_cms,R_Weight_lbs,B_age,R_age,Winner
0,Joe Riggs,Joe Doerksen,Steve Mazzagatti,2004-08-21,"Las Vegas, Nevada, USA",False,Middleweight,,,,...,0,0,0,Southpaw,182.88,177.8,185.0,26.0,21.0,Red
1,Jorge Masvidal,Al Iaquinta,Keith Peterson,2015-04-04,"Fairfax, Virginia, USA",False,Lightweight,1.15625,0.0,0.394141,...,0,1,0,Orthodox,180.34,187.96,170.0,27.0,30.0,Blue
2,Dan Stittgen,Stephen Thompson,Josh Rosenthal,2012-02-04,"Las Vegas, Nevada, USA",False,Welterweight,,,,...,0,0,0,Orthodox,185.42,,170.0,28.0,31.0,Blue
3,Josh Koscheck,Johny Hendricks,Kevin Mulhall,2012-05-05,"East Rutherford, New Jersey, USA",False,Welterweight,0.695312,0.0,0.783359,...,6,3,0,Orthodox,177.8,185.42,170.0,28.0,34.0,Blue
4,John Dodson,Manvel Gamburyan,James Warring,2016-04-16,"Tampa, Florida, USA",False,Bantamweight,0.5,0.266602,0.381462,...,3,0,1,Orthodox,160.02,167.64,135.0,34.0,31.0,Red


In [39]:
# Delete duplicate
print(f"Shape before dropping duplicate : {df.shape}")
df.drop_duplicates(inplace=True)
print(f"Shape after dropping duplicate : {df.shape}")

Shape before dropping duplicate : (5410, 144)
Shape after dropping duplicate : (5410, 144)


In [40]:
def check_null(df):
    col_na = df.isnull().sum().sort_values(ascending=True)
    percent = col_na / len(df)
    missing_data = pd.concat([col_na, percent], axis=1, keys=['Total', 'Percent'])

    if (missing_data[missing_data['Total'] > 0].shape[0] == 0):
        print("Tidak ditemukan missing value pada dataset")
    else:
        print(missing_data[missing_data['Total'] > 0])

In [41]:
check_null(df)

                         Total   Percent
R_Weight_lbs                 2  0.000370
R_Height_cms                 4  0.000739
B_Weight_lbs                 8  0.001479
B_Height_cms                10  0.001848
R_Stance                    27  0.004991
...                        ...       ...
B_avg_GROUND_landed       1293  0.239002
B_avg_GROUND_att          1293  0.239002
B_avg_opp_CLINCH_landed   1293  0.239002
B_avg_TD_att              1293  0.239002
B_avg_HEAD_landed         1293  0.239002

[109 rows x 2 columns]


In [42]:
# Split string and number
# Split dataframe into object type and number type
df_object = df.select_dtypes(include='object')
df_number = df.select_dtypes(include=np.number)

In [43]:
# Impute missing value using iterative imputer
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.tree import ExtraTreeRegressor

imputer = IterativeImputer(initial_strategy='median', random_state=42, estimator=ExtraTreeRegressor(random_state=42), max_iter=15)
df_imputed_number = imputer.fit_transform(df_number)
df_imputed_number = pd.DataFrame(df_imputed_number, columns=df_number.columns)



In [44]:
# Drop R_fighter, B_fighter, Referee, location, date
df_object = df_object.drop(['R_fighter', 'B_fighter', 'Referee', 'location', 'date'], axis=1)

In [45]:
# Combine df_object and df_imputed_number
df = pd.concat([df_object, df_imputed_number], axis=1)
df.head()

Unnamed: 0,weight_class,B_Stance,R_Stance,Winner,B_avg_KD,B_avg_opp_KD,B_avg_SIG_STR_pct,B_avg_opp_SIG_STR_pct,B_avg_TD_pct,B_avg_opp_TD_pct,...,R_win_by_Decision_Split,R_win_by_Decision_Unanimous,R_win_by_KO/TKO,R_win_by_Submission,R_win_by_TKO_Doctor_Stoppage,R_Height_cms,R_Reach_cms,R_Weight_lbs,B_age,R_age
0,Middleweight,Orthodox,Southpaw,Red,0.0,0.0,0.43,0.44375,1.0,0.165,...,0.0,0.0,0.0,0.0,0.0,182.88,177.8,185.0,26.0,21.0
1,Lightweight,Orthodox,Orthodox,Blue,1.15625,0.0,0.394141,0.352422,0.239219,0.011484,...,0.0,4.0,0.0,1.0,0.0,180.34,187.96,170.0,27.0,30.0
2,Welterweight,Orthodox,Orthodox,Blue,0.5,0.0,0.4525,0.313125,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,185.42,185.42,170.0,28.0,31.0
3,Welterweight,Southpaw,Orthodox,Blue,0.695312,0.0,0.783359,0.185547,0.088281,0.104375,...,1.0,5.0,6.0,3.0,0.0,177.8,185.42,170.0,28.0,34.0
4,Bantamweight,Orthodox,Orthodox,Red,0.5,0.266602,0.381462,0.456558,0.429614,0.46957,...,0.0,2.0,3.0,0.0,1.0,160.02,167.64,135.0,34.0,31.0


In [46]:
check_null(df)

          Total   Percent
R_Stance     27  0.004991
B_Stance     64  0.011830


In [47]:
# Impute missing R_stance and B_stance with mode
df['R_Stance'].fillna(df['R_Stance'].mode()[0], inplace=True)
df['B_Stance'].fillna(df['B_Stance'].mode()[0], inplace=True)

In [48]:
check_null(df)

Tidak ditemukan missing value pada dataset


In [49]:
def check_outlier(df):
    Q1 = df.quantile(0.25)
    Q3 = df.quantile(0.75)

    # Menghitung RUB dan RLB.
    IQR = Q3 - Q1
    lower_limit = Q1 - 1.5*IQR
    upper_limit = Q3 + 1.5*IQR

    # Menampilkan banyaknya outlier pada atribut.
    outliers = (df < lower_limit) | (df > upper_limit)
    print ("Outlier pada tiap atribut:")
    print(outliers.sum())

    return outliers

In [50]:
# Split string and number
# Split dataframe into object type and number type
df_object = df.select_dtypes(include='object')
df_number = df.select_dtypes(include=np.number)

In [51]:
outliers = dict(check_outlier(df_number).sum())
print("\n\npercentage of outliers in each column:")
for key in outliers.keys():
    print(f"{key} = {outliers[key]/df_number[key].shape[0] * 100}%")

Outlier pada tiap atribut:
B_avg_KD                 476
B_avg_opp_KD             646
B_avg_SIG_STR_pct        171
B_avg_opp_SIG_STR_pct    143
B_avg_TD_pct               0
                        ... 
R_Height_cms              19
R_Reach_cms               85
R_Weight_lbs             301
B_age                     31
R_age                     75
Length: 134, dtype: int64


percentage of outliers in each column:
B_avg_KD = 8.79852125693161%
B_avg_opp_KD = 11.940850277264325%
B_avg_SIG_STR_pct = 3.1608133086876156%
B_avg_opp_SIG_STR_pct = 2.6432532347504623%
B_avg_TD_pct = 0.0%
B_avg_opp_TD_pct = 0.0%
B_avg_SUB_ATT = 6.506469500924214%
B_avg_opp_SUB_ATT = 6.691312384473198%
B_avg_REV = 17.689463955637706%
B_avg_opp_REV = 17.13493530499076%
B_avg_SIG_STR_att = 3.3086876155268024%
B_avg_SIG_STR_landed = 2.7356746765249538%
B_avg_opp_SIG_STR_att = 3.6229205175600745%
B_avg_opp_SIG_STR_landed = 2.9944547134935307%
B_avg_TOTAL_STR_att = 1.77449168207024%
B_avg_TOTAL_STR_landed = 1.7190388170055

In [52]:
from scipy.stats.mstats import winsorize

from scipy.stats.mstats import winsorize

# Identify columns with more than 0.5% outliers
outlier_columns = []
for key in outliers.keys():
    if outliers[key] / df_number[key].shape[0] > 0.005:
        outlier_columns.append(key)

# Winsorize the identified columns
for column in outlier_columns:
    df_number[column] = winsorize(df_number[column], limits=(0, 0.1))

# Combine df_object and df_number
df = pd.concat([df_object, df_number], axis=1)
df.head()

Unnamed: 0,weight_class,B_Stance,R_Stance,Winner,B_avg_KD,B_avg_opp_KD,B_avg_SIG_STR_pct,B_avg_opp_SIG_STR_pct,B_avg_TD_pct,B_avg_opp_TD_pct,...,R_win_by_Decision_Split,R_win_by_Decision_Unanimous,R_win_by_KO/TKO,R_win_by_Submission,R_win_by_TKO_Doctor_Stoppage,R_Height_cms,R_Reach_cms,R_Weight_lbs,B_age,R_age
0,Middleweight,Orthodox,Southpaw,Red,0.0,0.0,0.43,0.44375,1.0,0.165,...,0.0,0.0,0.0,0.0,0.0,182.88,177.8,185.0,26.0,21.0
1,Lightweight,Orthodox,Orthodox,Blue,0.75,0.0,0.394141,0.352422,0.239219,0.011484,...,0.0,4.0,0.0,1.0,0.0,180.34,187.96,170.0,27.0,30.0
2,Welterweight,Orthodox,Orthodox,Blue,0.5,0.0,0.4525,0.313125,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,185.42,185.42,170.0,28.0,31.0
3,Welterweight,Southpaw,Orthodox,Blue,0.695312,0.0,0.605,0.185547,0.088281,0.104375,...,1.0,4.0,4.0,2.0,0.0,177.8,185.42,170.0,28.0,34.0
4,Bantamweight,Orthodox,Orthodox,Red,0.5,0.266602,0.381462,0.456558,0.429614,0.46957,...,0.0,2.0,3.0,0.0,0.0,160.02,167.64,135.0,34.0,31.0


In [53]:
outliers = dict(check_outlier(df_number).sum())
print("\n\npercentage of outliers in each column:")
for key in outliers.keys():
    print(f"{key} = {outliers[key]/df_number[key].shape[0] * 100}%")

Outlier pada tiap atribut:
B_avg_KD                   0
B_avg_opp_KD             646
B_avg_SIG_STR_pct         74
B_avg_opp_SIG_STR_pct     54
B_avg_TD_pct               0
                        ... 
R_Height_cms              19
R_Reach_cms               35
R_Weight_lbs               0
B_age                      0
R_age                      5
Length: 134, dtype: int64


percentage of outliers in each column:
B_avg_KD = 0.0%
B_avg_opp_KD = 11.940850277264325%
B_avg_SIG_STR_pct = 1.3678373382624769%
B_avg_opp_SIG_STR_pct = 0.9981515711645103%
B_avg_TD_pct = 0.0%
B_avg_opp_TD_pct = 0.0%
B_avg_SUB_ATT = 0.0%
B_avg_opp_SUB_ATT = 0.0%
B_avg_REV = 17.689463955637706%
B_avg_opp_REV = 17.13493530499076%
B_avg_SIG_STR_att = 0.0%
B_avg_SIG_STR_landed = 0.0%
B_avg_opp_SIG_STR_att = 0.0%
B_avg_opp_SIG_STR_landed = 0.0%
B_avg_TOTAL_STR_att = 0.0%
B_avg_TOTAL_STR_landed = 0.0%
B_avg_opp_TOTAL_STR_att = 0.0%
B_avg_opp_TOTAL_STR_landed = 0.0%
B_avg_TD_att = 0.0%
B_avg_TD_landed = 0.0%
B_avg_opp_TD_att

In [54]:
df['gender'] = df['weight_class'].apply(lambda x: 'women' if 'women' in x.lower() else 'male')

  df['gender'] = df['weight_class'].apply(lambda x: 'women' if 'women' in x.lower() else 'male')


In [55]:
df.head()

Unnamed: 0,weight_class,B_Stance,R_Stance,Winner,B_avg_KD,B_avg_opp_KD,B_avg_SIG_STR_pct,B_avg_opp_SIG_STR_pct,B_avg_TD_pct,B_avg_opp_TD_pct,...,R_win_by_Decision_Unanimous,R_win_by_KO/TKO,R_win_by_Submission,R_win_by_TKO_Doctor_Stoppage,R_Height_cms,R_Reach_cms,R_Weight_lbs,B_age,R_age,gender
0,Middleweight,Orthodox,Southpaw,Red,0.0,0.0,0.43,0.44375,1.0,0.165,...,0.0,0.0,0.0,0.0,182.88,177.8,185.0,26.0,21.0,male
1,Lightweight,Orthodox,Orthodox,Blue,0.75,0.0,0.394141,0.352422,0.239219,0.011484,...,4.0,0.0,1.0,0.0,180.34,187.96,170.0,27.0,30.0,male
2,Welterweight,Orthodox,Orthodox,Blue,0.5,0.0,0.4525,0.313125,0.0,0.0,...,0.0,0.0,0.0,0.0,185.42,185.42,170.0,28.0,31.0,male
3,Welterweight,Southpaw,Orthodox,Blue,0.695312,0.0,0.605,0.185547,0.088281,0.104375,...,4.0,4.0,2.0,0.0,177.8,185.42,170.0,28.0,34.0,male
4,Bantamweight,Orthodox,Orthodox,Red,0.5,0.266602,0.381462,0.456558,0.429614,0.46957,...,2.0,3.0,0.0,0.0,160.02,167.64,135.0,34.0,31.0,male


In [56]:
df['weight_class'] = df['weight_class'].str.replace('Women', '')

In [57]:
# Get all weight_class values
weight_class = df['weight_class'].unique()
weight_class

array(['Middleweight', 'Lightweight', 'Welterweight', 'Bantamweight',
       'Flyweight', 'LightHeavyweight', 'Strawweight', 'Featherweight',
       'OpenWeight', 'Heavyweight', 'CatchWeight'], dtype=object)

In [58]:
# Create a dictionary to map weight_class values to numbers
weight_class_dict = {
    'CatchWeight' : 0,
    'Strawweight' : 1,
    'Flyweight' : 2,
    'Bantamweight' : 3,
    'Featherweight' : 4,
    'Lightweight' : 5,
    'Welterweight' : 6,
    'Middleweight' : 7,
    'LightHeavyweight' : 8,
    'Heavyweight' : 9,
    'OpenWeight' : 10,
}

gender_dict = {
    'male' : 1,
    'women' : 0
}

In [59]:
# Map each weight_class value to the correct number
df['weight_class'] = df['weight_class'].map(weight_class_dict)

In [60]:
# Map each gender value to the correct number
df['gender'] = df['gender'].map(gender_dict)

In [61]:
df.head()

Unnamed: 0,weight_class,B_Stance,R_Stance,Winner,B_avg_KD,B_avg_opp_KD,B_avg_SIG_STR_pct,B_avg_opp_SIG_STR_pct,B_avg_TD_pct,B_avg_opp_TD_pct,...,R_win_by_Decision_Unanimous,R_win_by_KO/TKO,R_win_by_Submission,R_win_by_TKO_Doctor_Stoppage,R_Height_cms,R_Reach_cms,R_Weight_lbs,B_age,R_age,gender
0,7,Orthodox,Southpaw,Red,0.0,0.0,0.43,0.44375,1.0,0.165,...,0.0,0.0,0.0,0.0,182.88,177.8,185.0,26.0,21.0,1
1,5,Orthodox,Orthodox,Blue,0.75,0.0,0.394141,0.352422,0.239219,0.011484,...,4.0,0.0,1.0,0.0,180.34,187.96,170.0,27.0,30.0,1
2,6,Orthodox,Orthodox,Blue,0.5,0.0,0.4525,0.313125,0.0,0.0,...,0.0,0.0,0.0,0.0,185.42,185.42,170.0,28.0,31.0,1
3,6,Southpaw,Orthodox,Blue,0.695312,0.0,0.605,0.185547,0.088281,0.104375,...,4.0,4.0,2.0,0.0,177.8,185.42,170.0,28.0,34.0,1
4,3,Orthodox,Orthodox,Red,0.5,0.266602,0.381462,0.456558,0.429614,0.46957,...,2.0,3.0,0.0,0.0,160.02,167.64,135.0,34.0,31.0,1


In [62]:
# Get the remaining object columns
df_object = df.select_dtypes(include='object')

# Perform one-hot encoding
df_encoded = pd.get_dummies(df_object)

# Combine the encoded columns with the original dataframe
df = pd.concat([df, df_encoded], axis=1)

# Drop the original object columns
df.drop(df_object.columns, axis=1, inplace=True)

df.head()


Unnamed: 0,weight_class,B_avg_KD,B_avg_opp_KD,B_avg_SIG_STR_pct,B_avg_opp_SIG_STR_pct,B_avg_TD_pct,B_avg_opp_TD_pct,B_avg_SUB_ATT,B_avg_opp_SUB_ATT,B_avg_REV,...,B_Stance_Southpaw,B_Stance_Switch,R_Stance_Open Stance,R_Stance_Orthodox,R_Stance_Sideways,R_Stance_Southpaw,R_Stance_Switch,Winner_Blue,Winner_Draw,Winner_Red
0,7,0.0,0.0,0.43,0.44375,1.0,0.165,0.0,1.0,0.125,...,0,0,0,0,0,1,0,0,0,1
1,5,0.75,0.0,0.394141,0.352422,0.239219,0.011484,0.15625,0.132812,0.0,...,0,0,0,1,0,0,0,1,0,0
2,6,0.5,0.0,0.4525,0.313125,0.0,0.0,0.0,0.0,0.0,...,0,0,0,1,0,0,0,1,0,0
3,6,0.695312,0.0,0.605,0.185547,0.088281,0.104375,0.09375,0.0625,0.0,...,1,0,0,1,0,0,0,1,0,0
4,3,0.5,0.266602,0.381462,0.456558,0.429614,0.46957,1.37793,0.000244,0.125,...,0,0,0,1,0,0,0,0,0,1


In [63]:
# df.to_csv('../Without Feature Selection/UFC_kombinasi23_wt_featureselection.csv', index=False)

In [64]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import Lasso
from sklearn.model_selection import GridSearchCV

# Feature selection with Lasso
X = df.drop('B_Reach_cms', axis=1)
y = df['B_Reach_cms']

# Create the KNN model
lasso = Lasso(random_state=42)

lasso.fit(X, y)

# Get features with non-zero weights
non_zero_features = np.where(lasso.coef_ != 0)[0]

# Display the feature indices with non-zero weights
print("Features with non-zero weights:", non_zero_features)

Features with non-zero weights: [ 16  17  18  23  47  48  49  64  65  80 102 112 113 114 130 131]


In [65]:
# Define the parameter grid
param_grid = {
    'alpha': [0.1, 0.5, 1.0, 2.0, 5.0],
    'fit_intercept': [True, False],
    'precompute': [True, False],
    'max_iter': [1000, 2000, 5000],
    'tol': [0.0001, 0.001, 0.01],
    'selection': ['cyclic', 'random']
}

# Create the GridSearchCV object
grid_search = GridSearchCV(lasso, param_grid, cv=5, scoring='r2', n_jobs=-1)

# Fit the data to perform grid search
grid_search.fit(X, y)

# Print the best parameters and best score
print("Best Parameters:", grid_search.best_params_)
print("Best R-squared Score:", grid_search.best_score_)

  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinat

Best Parameters: {'alpha': 0.1, 'fit_intercept': True, 'max_iter': 1000, 'precompute': True, 'selection': 'random', 'tol': 0.01}
Best R-squared Score: 0.7887637237292279


In [71]:
# Feature importance
feature_importance = grid_search.best_estimator_.coef_
print(feature_importance)

# Get the indices of the feature importance
feature_importance_indices = np.argsort(feature_importance)

[ 5.09798613e-01  0.00000000e+00  0.00000000e+00 -0.00000000e+00
 -0.00000000e+00  0.00000000e+00 -0.00000000e+00  0.00000000e+00
 -0.00000000e+00  0.00000000e+00 -0.00000000e+00 -5.05043753e-04
 -6.54770723e-03  2.01795180e-02  2.87614520e-02  4.01626450e-05
 -1.31545200e-02 -4.38123826e-03 -1.07957858e-02  0.00000000e+00
  0.00000000e+00 -0.00000000e+00 -0.00000000e+00  7.97201319e-03
 -2.29664304e-03 -2.55797434e-02 -0.00000000e+00  1.90102529e-02
 -0.00000000e+00 -4.22912934e-02 -0.00000000e+00 -0.00000000e+00
 -3.51590337e-02  1.01772183e-02  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00 -1.69459915e-02 -2.41832216e-02
  0.00000000e+00 -2.19317943e-02 -0.00000000e+00  3.77044792e-02
 -5.36068091e-02  1.54909550e-02  0.00000000e+00 -2.01052285e-04
 -1.17238656e-03  1.64984648e-03  9.78995472e-03 -0.00000000e+00
  0.00000000e+00 -0.00000000e+00 -0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00 -0.00000

In [72]:
print(len(feature_importance_indices))

148


In [73]:
non_zero_features = np.where(feature_importance != 0)[0]

non_zero_features_indices = np.argsort(feature_importance[non_zero_features])

print(len(non_zero_features_indices))

58


In [74]:
# df_feature_selection is df that has column with non-zero weight
df_feature_selection = df.iloc[:, non_zero_features]

df_feature_selection.head()

Unnamed: 0,weight_class,B_avg_SIG_STR_att,B_avg_SIG_STR_landed,B_avg_opp_SIG_STR_att,B_avg_opp_SIG_STR_landed,B_avg_TOTAL_STR_att,B_avg_TOTAL_STR_landed,B_avg_opp_TOTAL_STR_att,B_avg_opp_TOTAL_STR_landed,B_avg_HEAD_att,...,R_avg_opp_GROUND_landed,R_avg_CTRL_time(seconds),R_avg_opp_CTRL_time(seconds),R_total_time_fought(seconds),R_win_by_TKO_Doctor_Stoppage,R_Height_cms,R_Reach_cms,R_Weight_lbs,B_age,R_age
0,7,33.5,11.0,82.5,34.5,37.0,26.5,131.6875,77.5,25.0625,...,6.75,343.0,204.625,718.0,0.0,182.88,177.8,185.0,26.0,21.0
1,5,135.625,53.234375,89.953125,31.296875,141.210938,56.71875,91.882812,33.078125,109.0,...,1.09375,252.3125,115.0,890.59375,0.0,180.34,187.96,170.0,27.0,30.0
2,6,66.0,22.112305,74.9375,33.640625,66.0,29.0625,97.914062,39.5,55.0,...,5.332153,343.0,78.577091,749.5,0.0,185.42,185.42,170.0,28.0,31.0
3,6,39.21875,19.015625,37.226562,14.53125,51.5,30.742188,52.046875,28.671875,30.367188,...,0.226578,145.693754,116.292816,793.567753,0.0,177.8,185.42,170.0,28.0,34.0
4,3,74.522461,29.986572,93.299805,42.413086,102.17041,51.280762,125.859863,71.359619,58.093994,...,0.09375,110.148438,162.796875,1129.59375,0.0,160.02,167.64,135.0,34.0,31.0


In [75]:
df_feature_selection['B_Reach_cms'] = y

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_feature_selection['B_Reach_cms'] = y


In [76]:
# Export to CSV
df_feature_selection.to_csv('../Punya Andi/UFC_kombinasi3_FS_lasso.csv', index=False)
