#### __Problem Statement__
The company is tryng to decide whether to focus their efforts on their mobile app experience or their website

In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
from plotly.offline import iplot

from sklearn import linear_model 
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split, KFold, cross_val_score


import warnings

pd.set_option('future.no_silent_downcasting', True)
pd.options.mode.copy_on_write = "warn"

### __About Dataset__
* `Avatar`: This column represent a Avatar Color chosen by the customer.¶
* `Avg. Session Length`: the average duration of sessions (in Minutes) of Mobile and Website.
* `Time on App`: the total amount of time (in Minutes) that a customer spends using the mobile App application.
* `Time on Website`: the total amount of time (in minutes) that a customer spends on the website.
* `Length of Membership`: the duration of membership or loyalty of each customer (in Months)
* `Yearly Amount Spent`:the total amount of money spent by each customer on the company's products Via an year.

In [None]:
df = pd.read_csv('./Ecommerce Customers.csv')

In [None]:
df.info()

In [None]:
df.sample(5, random_state=5)

__We can see that users spend more time on the website than on the mobile app__

__Hipothesis : We can say that he more time users spend on the website, the more money they spend throughout the year. But we need to figure it out on that Hipothesis__

### __Data Cleaning & Wrangling__

In [None]:
# Describe Categorical Data
df.select_dtypes(include='object').describe()

In [None]:
# Describe Numerical Data
np.round(df.describe().T, 2)

In [None]:
# Clean the columns' name from any spaces
df.columns = df.columns.str.replace(' ', '_').str.replace('.','')

In [None]:
df.rename(columns={
    'Time_on_App':'App_Usage',
    'Time_on_Website' : 'Website_Usage',
    'Length_of_Membership' : 'Membership_Length',
    'Yearly_Amount_Spent' : 'Yearly_Spent'}, inplace=True)

In [None]:
df.head()

### __Correlation Heatmap & Charts__

In [None]:
corr = df.corr(numeric_only=True)

fig = px.imshow(
    corr,
    template='plotly_dark',
    text_auto='0.2f',
    aspect=1,
    color_continuous_scale='orrd',
    title= 'Correlation Between Data'
)

fig.update_traces(
    textfont = {
        'size' : 16,
        'family' : 'consolas'
    }
)

fig.update_layout(
    title = {
        'font' : {
            'size' : 28,
            'family' : '<b>poppins'
        }
    }
)
iplot(fig)

In [None]:
import plotly.graph_objects as go

fig = px.scatter_matrix(
    df,
    dimensions= df.select_dtypes(include='number').columns,
    height=950,
    color='Yearly_Spent',
    opacity= .70,
    title= 'Relationships Between Numerical Data',
    template= 'plotly_dark',
    labels={'xtickangle': 90},
)

fig.update_layout(
    title= {
        'font' : {
            'size' : 28,
            'family' : '<b>poppins'
        }
    }
)

fig.update_layout(
    title_xanchor='right'
)
iplot(fig)