In [None]:
import pandas as pd
import pickle
from scipy.stats import poisson
import numpy as np

### **DECLARACION DE VARIABLES**

In [None]:
group_dict = pickle.load(open('groups_dict', 'rb'))
df_historical = pd.read_csv('clean_fifa_worldcup_matches.csv')
df_fixture = pd.read_csv('clean_fifa_worldcup_fixture.csv')

In [None]:
df_historical

### **DECLARACION DE FUNCION QUE GENERA MISSING VALUES**

In [None]:
def insert_nan(df, mcar_percentage=0.5, mar_percentage=0.5, mnar_percentage=0.5):
    """Inserts NaN values into a Pandas dataframe.

    Args:
        df: A Pandas dataframe.
        mcar_percentage: The percentage of Missing Completely at Random (MCAR) values to be inserted.
        mar_percentage: The percentage of Missing at Random (MAR) values to be inserted.
        mnar_percentage: The percentage of Missing Not at Random (MNAR) values to be inserted.

    Returns:
        A tuple of three dataframes, each with a different type of missing values.
    """
    # Make copies of the original dataframe
    df_mcar = df.copy()
    df_mar = df.copy()
    df_mnar = df.copy()
    
    # Calculate the number of missing values to insert for each dataframe
    mcar_count = int(mcar_percentage * df.size)
    mar_count = int(mar_percentage * df.size)
    mnar_count = int(mnar_percentage * df.size)
    
    # Insert MCAR values
    # Select random indices and columns for the missing values
    mcar_indices = np.random.choice(df_mcar.index.values, mcar_count, replace=True)
    mcar_columns = np.random.choice(df_mcar.columns.values, mcar_count, replace=True)
    # Insert the missing values
    for i, c in zip(mcar_indices, mcar_columns):
        df_mcar.loc[i, c] = np.nan
    
    # Insert MAR values
    # Select random columns for the missing values
    mar_columns = np.random.choice(df_mar.columns.values, mar_count, replace=True)
    # Insert the missing values at random indices for the selected columns
    for c in mar_columns:
        mar_indices = np.random.choice(df_mar[c].index.values, mar_count, replace=True)
        for i in mar_indices:
            df_mar.loc[i, c] = np.nan
    
    # Insert MNAR values
    # Select random rows for the missing values
    mnar_indices = np.random.choice(df_mnar.index.values, mnar_count, replace=True)
    # Insert the missing values at random columns for the selected rows
    for i in mnar_indices:
        mnar_columns = np.random.choice(df_mnar.columns.values, mnar_count, replace=True)
        for c in mnar_columns:
            df_mnar.loc[i, c] = np.nan
    
    return df_mcar, df_mar, df_mnar

### **GENERACION DE DATAFRAMES CON DIFERENTES TIPOS DE MISSING VALUES**

In [None]:
df_historical_mcar, df_historical_mar, df_historical_mnar = insert_nan(df_historical)

In [None]:
df_historical_mcar

In [None]:
df_historical_mar

In [None]:
df_historical_mnar