## Installing packages

In [None]:
!pip install --upgrade pip
!pip install "snowflake-connector-python[pandas]" "snowflake-snowpark-python[pandas]" snowflake-snowpark-python==1.9.0 numpy pandas matplotlib scikit-learn xgboost seaborn python-dateutil tqdm holidays faker
!pip install --upgrade --q snowflake-snowpark-python==1.9.0
!pip uninstall urllib3 -y
!pip install urllib3==1.26.15
!pip install fosforml==1.1.6

## Importing packages

In [1]:
from fosforml import *
from fosforml.constants import MLModelFlavours
from matplotlib import pyplot as plt
import pandas as pd
pd.set_option('display.max_columns', 500)
import seaborn as sns
from sklearn.metrics import mean_absolute_percentage_error
import numpy as np
import warnings; warnings.simplefilter('ignore')
from joblib import dump, load
import requests
from tqdm import tqdm
import time
import calendar

from time import sleep
import configparser
from dateutil.relativedelta import relativedelta
import datetime
from dateutil.easter import easter
from scipy.optimize import minimize_scalar
from scipy.optimize import curve_fit

%matplotlib inline

## Getting data from Github and moving to SF

In [None]:
# URL of the CSV file on GitHub
#url = 'https://github.com/aksh301091/fdc_akshaya_git/blob/91a0b3faf99492355d3816919fbf79de434926c3/ME_CASINO_PRJ/customer_table.csv'
#'https://raw.githubusercontent.com/username/repository/branch/filename.csv'

In [2]:
from fosforml.model_manager.snowflakesession import get_session
my_session = get_session()

In [3]:
# Read the CSV file into a DataFrame
cust_df = pd.read_csv('customer_table.csv')
#trx_df = pd.read_csv('transaction_table.csv')

#cust_df.info()

In [7]:
t1_df = pd.read_csv('trx_1.csv')
t2_df = pd.read_csv('trx_2.csv')
t3_df = pd.read_csv('trx_3.csv')

In [5]:
type(cust_df)

pandas.core.frame.DataFrame

In [None]:
cust_sfdf = my_session.createDataFrame(cust_df)
cust_sfdf.write.mode("overwrite").save_as_table("casino_customers")
#my_session.table("casino_customers").show()

In [11]:
#inter_df = pd.concat([t1_df, t2_df, t2_df, ignore_index=True)
inter_df = t1_df._append(t2_df,ignore_index=True)

#for trx_df in (t1_df, t2_df, t3_df):
#    trx_df =  trx_df.append(df, ignore_index=True)
#trx_df

In [12]:
trx_df = inter_df._append(t3_df, ignore_index=True)

In [15]:
trx_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200000 entries, 0 to 199999
Data columns (total 24 columns):
 #   Column                              Non-Null Count   Dtype  
---  ------                              --------------   -----  
 0   TRANSACTION_ID                      200000 non-null  int64  
 1   DATE                                200000 non-null  object 
 2   PLAYER_ID                           200000 non-null  int64  
 3   PLAYER_AGE                          200000 non-null  int64  
 4   PLAYER_GENDER                       200000 non-null  object 
 5   HOME_COUNTRY                        200000 non-null  object 
 6   HOME_CITY                           200000 non-null  object 
 7   GAME_CATEGORY                       200000 non-null  object 
 8   GAME_NAME                           200000 non-null  object 
 9   TABLE_MINIMUM_BET                   200000 non-null  float64
 10  IS_PREMIUM_PLAYER                   200000 non-null  bool   
 11  DURATION_SPENT            

In [14]:
trx_sfdf = my_session.createDataFrame(trx_df)
trx_sfdf.write.mode("overwrite").save_as_table("casino_transactions")

In [16]:
## Reading data from SF 

table_name = 'CASINO_CUSTOMERS'
customer_df = my_session.sql("select * from {}".format(table_name))
customer_df = customer_df.to_pandas()
type(customer_df)

pandas.core.frame.DataFrame

In [17]:
table_name = 'CASINO_TRANSACTIONS'
transaction_df = my_session.sql("select * from {}".format(table_name))
transaction_df = transaction_df.to_pandas()
type(transaction_df)

pandas.core.frame.DataFrame

## Data Exploration

In [18]:
# Descriptive statistics for customer data
print(customer_df.describe(include='all'))
print(transaction_df.describe(include='all'))

          PLAYER_ID           AGE GENDER HOME_COUNTRY  HOME_CITY  \
count   10000.00000  10000.000000  10000        10000      10000   
unique          NaN           NaN      2            4         31   
top             NaN           NaN   Male           US  Singapore   
freq            NaN           NaN   5009         2527       2485   
mean     5000.50000     60.339200    NaN          NaN        NaN   
std      2886.89568     14.311612    NaN          NaN        NaN   
min         1.00000     21.000000    NaN          NaN        NaN   
25%      2500.75000     50.000000    NaN          NaN        NaN   
50%      5000.50000     63.000000    NaN          NaN        NaN   
75%      7500.25000     73.000000    NaN          NaN        NaN   
max     10000.00000     80.000000    NaN          NaN        NaN   

       DATE_FIRST_VISIT DATE_LAST_VISIT  TOTAL_NUMBER_OF_VISITS  \
count             10000           10000            10000.000000   
unique              326             316          

In [19]:
# Info about dataframe
print(customer_df.info())
print(transaction_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 25 columns):
 #   Column                                   Non-Null Count  Dtype  
---  ------                                   --------------  -----  
 0   PLAYER_ID                                10000 non-null  int16  
 1   AGE                                      10000 non-null  int8   
 2   GENDER                                   10000 non-null  object 
 3   HOME_COUNTRY                             10000 non-null  object 
 4   HOME_CITY                                10000 non-null  object 
 5   DATE_FIRST_VISIT                         10000 non-null  object 
 6   DATE_LAST_VISIT                          10000 non-null  object 
 7   TOTAL_NUMBER_OF_VISITS                   10000 non-null  int8   
 8   TOTAL_DURATION_SPENT                     10000 non-null  float64
 9   AVERAGE_DURATION_PER_VISIT               10000 non-null  float64
 10  TOTAL_CHIPS_WON_OR_LOST                  10000 