In [58]:
import numpy as np
import pandas as pd
from sklearn.impute import SimpleImputer
from lightgbm import LGBMRegressor, LGBMClassifier


def cols_to_impute(df):
    cols = []
    for col in df.columns:
        if df[col].isnull().sum() != 0:
            cols.append(col)
    return cols


def complete_columns(df):
    cols = []
    for col in df.columns:
        if df[col].isnull().sum() == 0:
            cols.append(col)
    return cols


def missing_indices(df):
    indices = {}
    for col in cols_to_impute(df):
        indices[col] = df[df[col].isnull()].index.tolist()
    return indices

def main(path):
    df = pd.read_csv(path)
    numeric_cols = df.select_dtypes(include='number').columns
    categorical_cols = df.select_dtypes(exclude='number').columns

    missing_cols = cols_to_impute(df)
    complete_cols = complete_columns(df)

    numeric_missing_cols = list(set(missing_cols) & set(numeric_cols))
    categorical_missing_cols = list(
        set(missing_cols) & set(categorical_cols))

    df_numeric = df.select_dtypes(include='number')
    print(df_numeric.shape)
    df_categoric = df.select_dtypes(exclude='number')

    # Train and Test df
    train_df_numeric = df_numeric.dropna()
    train_df_categoric = df_categoric.dropna()

    pred = {}

    for i, target_column in enumerate(numeric_missing_cols):
        print(f'target column: {target_column}')
        X_train = train_df_numeric.drop(columns=[target_column])
        y_train = train_df_numeric[[target_column]]

        test_df = df_numeric[df_numeric[target_column].isnull()]
        X_test = test_df.drop(columns=[target_column])

        print(f'Fitting {i+1}/{len(numeric_missing_cols)} columns')
        lgbm_numeric = LGBMRegressor()
        lgbm_numeric.fit(X_train, y_train)
        print(f'{i+1}/{len(numeric_missing_cols)} columns fitted')
        pred[target_column] = lgbm_numeric.predict(X_test)
        
        print(missing_indices(df_numeric)[target_column][-1])
        print(len(pred[target_column]))
        for i, index in enumerate(missing_indices(df_numeric)[target_column]):
            df_numeric.loc[index, target_column] = pred[target_column][i]

  

    return df_numeric.info()


# main('data\df.csv', 'lgbm')

In [59]:
# df1 = read_data('data\df.csv')
main('data\df.csv')

(407688, 12)
target column: down
Fitting 1/7 columns
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.011674 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 727
[LightGBM] [Info] Number of data points in the train set: 346515, number of used features: 11
[LightGBM] [Info] Start training from score 2.002488
1/7 columns fitted
407687
61154
target column: FirstDown
Fitting 2/7 columns
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.004611 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 730
[LightGBM] [Info] Number of data points in the train set: 346515, number of used features: 11
[LightGBM] [Info] Start training from score 0.317631
2/7 columns fitted
407680
28811
target column: GoalToGo
Fi

In [42]:
df1 = pd.read_csv('data\df.csv')
missing_indices(df1)

{'down': [0,
  36,
  68,
  71,
  73,
  74,
  77,
  79,
  80,
  83,
  87,
  90,
  123,
  132,
  147,
  152,
  157,
  160,
  163,
  165,
  166,
  176,
  178,
  187,
  209,
  210,
  219,
  220,
  231,
  243,
  245,
  247,
  255,
  256,
  268,
  269,
  286,
  287,
  291,
  315,
  322,
  323,
  338,
  343,
  346,
  347,
  349,
  350,
  356,
  357,
  365,
  366,
  376,
  392,
  394,
  396,
  397,
  418,
  420,
  421,
  425,
  430,
  434,
  435,
  436,
  437,
  438,
  449,
  452,
  453,
  462,
  463,
  475,
  483,
  485,
  486,
  492,
  495,
  508,
  509,
  515,
  526,
  531,
  532,
  540,
  543,
  561,
  564,
  569,
  575,
  576,
  597,
  603,
  608,
  609,
  612,
  614,
  616,
  626,
  627,
  655,
  660,
  661,
  664,
  665,
  683,
  684,
  691,
  698,
  699,
  702,
  703,
  735,
  738,
  740,
  769,
  770,
  773,
  777,
  782,
  784,
  785,
  799,
  800,
  823,
  833,
  839,
  840,
  852,
  859,
  861,
  865,
  866,
  896,
  911,
  923,
  924,
  928,
  931,
  934,
  936,
  939,
  941,
  94