<a href="https://colab.research.google.com/github/dal7collab/google_collab_py/blob/main/indian_food.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# IMPORT FILE FROM DRIVE

In [None]:
# Google User Authentication
from google.colab import auth
auth.authenticate_user()

In [None]:
# Google Credentials Authentication
import gspread
from oauth2client.client import GoogleCredentials
gc = gspread.authorize(GoogleCredentials.get_application_default())

In [None]:
# Import Google Sheet into Google Collab Data Object
file_name = 'indian_food' 
cn = gc.open(file_name).sheet1
sh = cn.get_all_values()
print(sh[0:1])

# PANDAS

In [None]:
# Convert Google Sheet into Pandas Data Frame
import pandas as pd
df_sh = pd.DataFrame.from_records(sh)

In [None]:
# Convert First Row into Header
header = df_sh.iloc[0]        # Grab the first row for the header
df_sh  = df_sh[1:]            # Take the data less the header row
df_sh.columns = header        # Set the header row as the df header

In [None]:
# Copy Data Frame
df = df_sh.copy(deep = True)

In [None]:
# Obtain Data Frame General Info 
# Column names, Missing values, Data types
df.info()

In [None]:
# Retrieve First n (5) Rows
n = 2
df.head(n)
# df.tail(n)

In [None]:
# Describe Data Frame Series General Info
name = 'course'
df[name].describe()

In [None]:
# Obtain Data Frame Series Unique Values
name = 'course'
df[name].unique()

In [None]:
# Transform Column Type from Object to int32
df['prep_time'] = df['prep_time'].astype({'prep_time': 'int32'})

In [None]:
# Check again Data Frame Series General Info
name = 'prep_time'
df[name].describe()

In [None]:
# Function Casting Column Type
def cast_type(data_frame, column_name, type_name):
  data_frame[column_name] = data_frame[column_name].astype({column_name:type_name}) 
  print(column_name + ':ok')

In [None]:
# Function Run
column_name = 'cook_time'
cast_type(df, column_name, 'int32')
df[column_name].describe()

In [None]:
# Data Numerical Columns 'Simple' Stats
df.describe()

In [None]:
# Data Categorical Columns 'Simple' Stats
df.describe(include=['O'])

In [None]:
# Replace -1 values to np.nan values in specific column data frame
import numpy as np
df_rp = df.copy(deep = True)
df_rp['cook_time_none'] = df_rp['cook_time'].replace(to_replace = -1,
                                                          value = np.nan,
                                                        inplace = False)

In [None]:
# Source Dataframe Info
df.info()

In [None]:
# Current Dataframe Info after -1 values replace to NaN
df_rp.info()

In [None]:
# Replace -1 Values to np.nan values in Dataframe
import numpy as np
df_rp = df.copy(deep = True)
df_rp.replace(to_replace = [-1, '-1'],
                   value = np.nan,
                 inplace = True)

In [None]:
# Count NaN rows in Dataframe
name = 'state'
s = df_rp.loc[df_rp[name].isna()]
len(s.index)

# MISSING VALUES REPLACEMENT

In [None]:
# Function replaces np.nan values in all data frame with relevant statistics (mean, mode)
def rep_missings(data_frame):
    d = data_frame.copy(deep = True)
    for c in data_frame:
        if (data_frame[c].dtype == 'int32'):
            if data_frame[c].isnull().values.any():
                m = data_frame[c].dropna().mean()
                d[c].fillna(m, inplace = True)
        else:   
            if data_frame[c].isnull().values.any():
              m = data_frame[c].dropna().mode()[0]
              d[c].fillna(m, inplace = True)              
    return d

In [None]:
# Replace np.nan values in all data frame with relevant statistics (mean, mode)
df_rp = rep_missings(df_rp)

In [None]:
name = 'region'
a = df_rp[name].unique()
a
# a[0]
# a[a.size - 1]

# EXPORT FILE TO DRIVE

In [None]:
# Mount Google drive in Google Collab
from google.colab import drive
drive.mount('/drive')

In [None]:
# Export data_frame into Google drive in comma separated format
df_rp.to_csv('/drive/My Drive/indian_food_nulls.csv')
print("File was exported succesfully.")

# BI DASHBOARDS OPTIMIZATION

In [None]:
name = 'region'
a = df_rp[name].unique()
a

In [None]:
# BI INSIGHTS ROBOT
def bi_robot(dataframe, dimensions, measure):
  # Student WORK 
  # Lecturer first preparations, hints and thoughts
  # This function needs to return anomalous tuples which can be serve
  # as default values in BI dashboards and further BI dashboards conclusions
  t = {}
  for d in dimensions:
    du = dataframe[d].unique() 
    t[d] = du[0]

  for v in t:
    key       = v
    value     = [t[v]]
    dataframe = dataframe.loc[dataframe[key].isin(value)] 
    stat      = dataframe[measure].mean() 
  
  print('---------------------')
  print(t, end = '')  
  print(' Mean:' + "{:.2f}".format(stat))
  print('---------------------')
  return t


In [None]:
df         = df_rp.copy(deep = True)
filters    = ['diet', 'course']
measure    = 'prep_time'
df = bi_robot(df, filters, measure)
df

---------------------
{'diet': 'vegetarian', 'course': 'dessert'} Mean:28.08
---------------------


{'course': 'dessert', 'diet': 'vegetarian'}