In [None]:
# Last amended: 19th March, 2021
# Data source: Kaggle: https://www.kaggle.com/fayomi/advertising
#
# Spyder note: Use F9 in Spyder to execute a line and advance
#
# objectives:
#           Discover relationships and existence of pattern
#           in data
#              i)  Feature Engineering for categorical variables
#             ii)  Extracting datetime components
#             iii) Behaviour of 'apply' vs 'map' for Series
#              iv)  Learning to draw various types of graphs
#               v)  Conditional plots using catplot
#              vi)  Relationship plots using relplot
#             vii)  Learning seaborn plots
#            viii)  Looking at structure in data
#                       a. Parallel coordinates plots
#                       b. Radviz plots
#                       c. Andrews curves
# Good reference: https://seaborn.pydata.org/introduction.html


In [None]:

# 1.0 Call libraries
#%reset -f
# 1.1 For data manipulations
import numpy as np
import pandas as pd
# 1.2 For plotting
import matplotlib.pyplot as plt
#import matplotlib
#import matplotlib as mpl     # For creating colormaps
import seaborn as sns
# 1.3 For data processing
from sklearn.preprocessing import StandardScaler
# 1.4 OS related
import os

# 1.5
#%matplotlib qt5
%matplotlib inline


In [None]:
# 1.6 Display outputs of multiple commands from a cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [None]:
# 1.7 Go to folder containing data file
#os.chdir("D:\\data\\OneDrive\\Documents\\advertising")
#os.chdir("/home/ashok/datasets/advertising")

os.chdir("D:\\data\\OneDrive\\Documents\\advertising")
#os.chdir("C:\\Users\\Administrator\\OneDrive\\Documents\\advertising")

#os.listdir()            # List all files in the folder

# 1.7 Read file and while reading file,
#      convert 'Timestamp' to datetime time
ad = pd.read_csv("advertising.zip",
                  parse_dates = ['Timestamp']    # especial for date parsing
                  )

#ad = pd.read_csv("ad_mod.csv",
#                  parse_dates = ['Timestamp']    # especial for date parsing
#                  )

In [None]:
# 1.8 Check data types of attributes
ad.dtypes

In [None]:
# 1.9 Distribution of data types
ad.dtypes.value_counts()

In [None]:
# 1.10 Some more dataset related information
pd.options.display.max_columns = 100
pd.set_option("display.max.columns" , 100)

In [None]:
# 1.11 A little data exploration
ad.head(3)
ad.info()               # Also informs how much memory dataset takes
                        #   and status of nulls
ad.shape                # (1000, 10)
ad.columns.values
len(ad)                 # Also give number of rows


In [None]:
## 
## Shortcut
##
################################
# ALL data changes at one place
###############################
## AA
new_col_names  = {
                 'Daily Time Spent on Site' :  'DailyTimeSpentonSite',
                 'Area Income'              :  'AreaIncome',
                 'Daily Internet Usage'     :  'DailyInternetUsage',
                 'Clicked on Ad'            :  'Clicked_on_ad',
                 'Male'                     :  'Gender',
                 'Ad Topic Line'            :  'AdTopicLine'
                }
##AB.
ad.rename(
         columns = new_col_names,
         inplace = True,
         #axis = 1             # Note the axis keyword. By default it is axis = 0
         )



## AC.
ad["age_cat"] = pd.cut(
                       ad['Age'],
                       bins = 3,           # Else devise your bins: [0,20,60,110]
                       labels= ["y", "m", "s"]
                      )



## AD.
ad["area_income_cat"] = pd.cut(
                               ad['AreaIncome'],
                               bins = 3,
                               labels= ["l", "m", "h"]
                               )


## AE
ad['AdTopicLineLength'] = ad['AdTopicLine'].map(lambda x : len(x))  


##  AF. 
fx = ad['AdTopicLine'].str.split(" ").map(lambda x: x[0])
vc = fx.value_counts()
ad['ad_cat'] = fx
ad['ad_cat'].map(vc)



# AG. 
ad['hourOfDay']    = ad['Timestamp'].dt.hour
ad['weekday']      = ad['Timestamp'].dt.weekday
ad['month']        = ad['Timestamp'].dt.month # First we get month. Then we map month to quarter
                                              #   See below


# AH.
l1 = ["earlymorning", "morning", "afternoon", "evening", "night","latenight"]

# AI
ad["hourOfDay"] = pd.cut(
                     ad['hourOfDay'],
                     bins = [-1,6,12,17,20,22,24],
                     labels = l1
                     )
# AJ
mymap = {
           0 : 'Monday',
           1 : 'Tuesday',
           2: 'Wednesday',
           3: 'Thursday',
           4: 'Friday',
           5: 'Saturday',
           6: 'Sunday'
         }


# AK
ad['weekday'] = ad['weekday'].map(mymap)

# AL
lq = ["quarter1", "quarter2", "quarter3", "quarter4"]


# AM
ad["quarter"] = pd.cut(
                     ad['month'],
                     bins = [0,3,6,9,12],
                     labels = lq
                     )


# AN
ad.to_csv('ad_mod.csv', index=False)
ad.head()

In [None]:
# 2.0 Rename few column names:
new_col_names  = {
                 'Daily Time Spent on Site' :  'DailyTimeSpentonSite',
                 'Area Income'              :  'AreaIncome',
                 'Daily Internet Usage'     :  'DailyInternetUsage',
                 'Clicked on Ad'            :  'Clicked_on_ad',
                 'Male'                     :  'Gender',
                 'Ad Topic Line'            :  'AdTopicLine'
                }
# 2.1
ad.rename(
         columns = new_col_names,
         inplace = True,
         #axis = 1             # Note the axis keyword. By default it is axis = 0
         )

# 2.2
ad.head(3)
ad.columns.values

# 2.3 Write to CSV
ad.to_csv('ad_mod.csv', index=False)
%pwd

### Categorical data

#### Examine Categorical data

In [None]:
# 3.0 Categorical data value counts
#     Or number of levels per category

len(ad.City.unique())                   # 969 cities out of 1000
ad.City.value_counts()
#ad.City.unique()

In [None]:
# 3.1 How many conutries
len(ad.Country.unique())                # 237 countries
ad.Country.value_counts()               # Mostly 2 per countryt

Seaborn Countplot  
Show the counts of observations in each categorical bin using bars.  

> `seaborn.countplot(*, x=None, y=None, hue=None, data=None, order=None, hue_order=None, orient=None, color=None, palette=None, saturation=0.75, dodge=True, ax=None, **kwargs)`

In [None]:
# 3.2 Distribution of gender
ad['Gender'].value_counts()                  # 519:481
                                             # 1 : Female
                                             # 0 : Male 


In [None]:
# 3.2 Distribution of gender, graphically:

_= sns.countplot(x = 'Gender', data = ad)

In [None]:
# 3.3 Distribution of clicks:

ad['Clicked_on_ad'].value_counts()      # 1 and 0 in the ratio of 500:500
                                        # This is highly optimistic. Genrally clicks may be 1%


In [None]:
# 3.3 Distribution of clicks, graphically:

_= sns.countplot(x = 'Clicked_on_ad', data = ad)

In [None]:
# 3.4 Distribution of clicks, Gender-wise:

#     Among those who clicked on ad, 
#      males are a little more than females
#       But difference in clicking habits does not appear
#        to be significant
#
#     Use: age_cat and then compare this plot with the next one   

_= sns.countplot(
                  x = 'Clicked_on_ad',   # Countplot
                  hue = 'Gender',       # subset by 'Gender'
                  data = ad
                 )

In [None]:
# 3.4.1 Another way to draw bar-plot
#       But which one is more informative?
#       Earlier one or this one?

_=sns.barplot(
              x = 'Clicked_on_ad',   # Countplot
              y = 'Age',        # subset by 'Gender'
              data = ad
             )

#### Cat feature Engineering

In [None]:
# 4.1 Descretise continuos columns
#     These are equal width bins as against
#     equal data-points bins (quantile) or kmeans clusters
#     Alternatively use KBinsDiscretizer of sklearn

ad["age_cat"] = pd.cut(
                       ad['Age'],
                       bins = 3,           # Else devise your bins: [0,20,60,110]
                       labels= ["y", "m", "s"]
                      )

# 4.1.1
ad.head(2)


In [None]:
# 4.2 Bin AreaIncome
ad["area_income_cat"] = pd.cut(
                               ad['AreaIncome'],
                               bins = 3,
                               labels= ["l", "m", "h"]
                               )
# 4.2.1
ad.head(2)

In [None]:
# 4.3 Create a new column as per length of each ad-line
#     Both the following lines do the same thing

ad['AdTopicLineLength'] = ad['AdTopicLine'].apply(lambda x : len(x)) 
ad['AdTopicLineLength'] = ad['AdTopicLine'].map(lambda x : len(x))  

# 4.3.1
ad.head(2)

In [None]:
# 4.3.2 Save your work:
ad.to_csv('ad_mod.csv', index=False)
%pwd

In [None]:
#4.4 Distribution of clicks, Age wise and area income wise
_=sns.catplot(
               x = 'Clicked_on_ad',
               hue = 'area_income_cat',
               col = 'age_cat',
               kind = 'count',
               data = ad
             )

In [None]:
## Which ad category? And how many times it occurs?
## INCLUDE THIS IDEA
fx = ad['AdTopicLine'].str.split(" ").map(lambda x: x[0])
vc = fx.value_counts()
vc[:4]
ad['ad_cat'] = fx
ad['ad_cat'].map(vc)

In [None]:
# 5.0 Extract date components using Series.dt accessor
#     https://pandas.pydata.org/pandas-docs/stable/reference/series.html#api-series-dt
#     https://pandas.pydata.org/pandas-docs/stable/reference/series.html#datetime-properties

# 5.1 What is the type of 'dt'
type(ad['Timestamp'].dt)    # Accessor like get()
                            # pandas.core.indexes.accessors.DatetimeProperties

# 5.2 Extract hour, weekday and month
ad['hourOfDay']    = ad['Timestamp'].dt.hour
ad['weekday']      = ad['Timestamp'].dt.weekday
ad['month']        = ad['Timestamp'].dt.month # First we get month. Then we map month to quarter
                                              #   See below

# 5.2.1
ad.head(2)

In [None]:
# 5.3 Cut hour to morning, evening, night etc
#     For example 0 to 6am is earlymorning

# 5.3.1 For easy interpretation of graphs, use l1
l1 = ["earlymorning", "morning", "afternoon", "evening", "night","latenight"]


# 5.3.2
ad["hourOfDay"] = pd.cut(
                     ad['hourOfDay'],
                     bins = [-1,6,12,17,20,22,24],
                     labels = l1
                     )

# 5.3.3
ad.head(2)

# 5.3.4
ad.to_csv('ad_mod.csv', index=False)
%pwd

In [None]:

# 5.4 Similarly for weekdays
#     Map weekday numbers to weekday names
#     We use Series.map() method

mymap = {
           0 : 'Monday',
           1 : 'Tuesday',
           2: 'Wednesday',
           3: 'Thursday',
           4: 'Friday',
           5: 'Saturday',
           6: 'Sunday'
         }


# 5.4.1 For easy interpretation of weekdays in graphs
ad['weekday'] = ad['weekday'].map(mymap)

# 5.4.2
ad.head(2)

# 5.4.3
ad.to_csv('ad_mod.csv', index=False)
%pwd

In [None]:
# 5.5 Divide months into qurters

# 5.5.1 For easy interpretation of graphs, use l1
lq = ["quarter1", "quarter2", "quarter3", "quarter4"]


# 5.5.2
ad["quarter"] = pd.cut(
                     ad['month'],
                     bins = [0,3,6,9,12],
                     labels = lq
                     )

# 5.5.3
ad[['month', 'quarter']].head(3)

# 5.5.4
ad.to_csv('ad_mod.csv', index=False)
%pwd

In [None]:
# 5.6 So finally what are col names?
ad.columns.values
ad.shape               # (1000, 18)  Earlier shape was (1000, 10)

In [None]:
##################
# 6 Plotting
##################
# A summary of syntax of important plots
#---------------------------------------
# 1. sns.displot()
#    (note: sns.distplot() is depreciated)
# https://seaborn.pydata.org/generated/seaborn.distplot.html
# displot(data=None, *, x=None, y=None, hue=None, row=None,
#         col=None, weights=None, kind='hist', rug=False,
#         rug_kws=None, log_scale=None, legend=True, palette=None,
#         hue_order=None, hue_norm=None, color=None, col_wrap=None,
#         row_order=None, col_order=None, height=5, aspect=1, facet_kws=None,
#         **kwargs)
#         kind: 'hist', 'kde', 'ecdf'
###$$$$
# Note: 'displot' DOSES not take 'ax' argument. So if you need to use, 'ax'
#       argument, use: histplot, kdeplot, ecdfplot
###$$$$

# 2. sns.jointplot()
# http://seaborn.pydata.org/generated/seaborn.jointplot.html
# jointplot(x, y, data=None, kind='scatter',
#          stat_func=None, color=None, height=6, ratio=5, space=0.2,
#          dropna=True, xlim=None, ylim=None, joint_kws=None, marginal_kws=None,
#          annot_kws=None, **kwargs)
#         kind: { “scatter” | “kde” | “hist” | “hex” | “reg” | “resid” }
#
###$$$$
# Note: 'jointplot' DOSES not take 'ax' argument. So if you need to use, 'ax' argument
#       use: scatterplot, kdelot, histplot, regplot, residplot
###$$$$
#
# 3. sns.replplot()
# https://seaborn.pydata.org/generated/seaborn.relplot.html
# relplot(x=None, y=None, hue=None, size=None, style=None, data=None, row=None,
#        col=None, col_wrap=None, row_order=None, col_order=None, palette=None,
#        hue_order=None, hue_norm=None, sizes=None, size_order=None, size_norm=None,
#        markers=None, dashes=None, style_order=None, legend='brief', kind='scatter',
#        height=5, aspect=1, facet_kws=None, **kwargs)
#        kind: 'scatter' or 'line'
#
# 4. sns.catplot()
# https://seaborn.pydata.org/generated/seaborn.catplot.html
# catplot(*, x=None, y=None, hue=None, data=None, row=None,col=None, col_wrap=None,
#         estimator=<function mean at 0x7fecadf1cee0>, height=5, aspect=1, orient=None,
#         ci=95, n_boot=1000, units=None, seed=None, order=None, hue_order=None,
#         row_order=None, col_order=None, kind='strip',sharex=True, sharey=True,
#         color=None, palette=None, legend=True, legend_out=True,
#         margin_titles=False, facet_kws=None, **kwargs )
#         kind:  “strip”, “swarm”, “box”, “violin”, “boxen”, “point”, “bar”, or “count”
#
###$$$$
# Note: 'catplot' DOSES not take 'ax' argument. So if you need to use, 'ax' argument
#       use: stripplot, swarmplot, boxplot, violinplot,boxenplot,barplot, countplot
###$$$$
#
# 5. sns.barplot()
#    https://seaborn.pydata.org/generated/seaborn.barplot.html
# barplot(*, x=None, y=None, hue=None, data=None, order=None, hue_order=None,
#         estimator=<function mean at 0x7fecadf1cee0>, ci=95, n_boot=1000,
#         units=None, seed=None, orient=None, color=None, palette=None,
#         saturation=0.75, errcolor='.26', errwidth=None, capsize=None,
#         dodge=True, ax=None, **kwargs)
#
###$$$$
# Note: For plotting counts of a single cat feature, use 'countplot'
#       For summarising another continuous function, against cat-feature
#       use barplot with estimator of np.sum, np.mean etc
###$$$$
#


In [None]:
####################################
## Plotting questions that we will answer
####################################
#
## 1 Understand your numeric data
##   How is it distributed.

# Question 1: How is Age distributed?
# Question 2: How is DailyTimeSpentonSite distributed?
# Question 3: How is AreaIncome distributed?
# Question 4: Use for loop to draw the distribution plots for the following
#             columns = ['Age', 'AreaIncome', 'DailyInternetUsage', 'DailyTimeSpentonSite']

# 2.0 Relationship of numeric variable with a categorical variable

# Question 5: How is 'Age' related to clicking?
# Question 6: How is DailyInternetUsage related to clicking?
# Question 7: How is 'AreaIncome' related to clicking?
# Question 8: Draw all the following relationship plots at one go:
#               columns = ['Age', 'AreaIncome', 'DailyInternetUsage', 'DailyTimeSpentonSite']
#               catVar = ['Clicked_on_ad', 'age_cat' ]

# 3.0 Relationship of numeric to numeric variables
#     Using jointplots:

# Question 9:  Show joint distribution of DailyTimeSpentonSite and AreaIncome
# Question 10: Show joint distribution of DailyInternetUsage and DailyTimeSpentonSite
# Question 11: Show these plots as kernel density as also 'hex' as also
#              draw regression line

# 4.0 Relationship of a categorical to another categorical variable

# Question 12: What relationship exist between 'Clicked_on_ad' and 'Gender'?
# Question 13: What relationship exist between 'DailyTimeSpentonSite' and 'Gender'?
# Question 14: Relationship between Gender and Clicked_on_ad, subset by 'age_cat wise

# 5.0 Relationship between two categorical and one numeric variable

# Question 15: Hour and weekday wise when are clicks most
# Question 16: Quarter wise and weekday wise when are clicks most
# Question 17: Quarter wise and weekday wise when are DailyInternetUsage max and min

# 6.0 Structure in data
# Question 18: Does data exhibit any pattern with respect to 'Clicked_on_ad'
#              Explore how good the patterns are. Stronger patterns will lead
#              to better classifications
#


In [None]:
###### Start answering questions

In [None]:
# 7.0.1 Sample data:
#       This step is academic here. But for large datasets,
#       there is a need to sample data before plotting so
#       that they do not crowd limited X-Y space

dn = ad.sample(frac = 0.5)    # Extract 50% sample of data
dn.shape      # (500,20)

In [None]:
## Task 1 Understand your numeric data
##         How is it distributed.

# Question 1: How is Age distributed?
# Question 2: How is DailyTimeSpentonSite distributed
# Question 3: How is AreaIncome distributed

In [None]:
# 7.1 Age is slight skewed to right. Naturally density of younger
#       persons is high. 
#    Try: kind = 'kde', col = 'Gender', kde = True

_ = sns.displot( x = 'Age', data = ad )

In [None]:
# 7.1.1 Add more plot configurations
# Refer: https://matplotlib.org/api/axes_api.html#matplotlib-axes
ax= sns.displot( x = 'Age', data = ad)

# 7.1.2
_= ax.set( xlim =(10,80),                     #  sns.distplot does not have **kwargs
        xlabel= "age of persons",
        ylabel = "counts",
        title= "Histogram of Age",
        xticks = list(range(0,80,5))
        )


In [None]:
# 7.1.3 Relationship of Age vs AreaIncome
#       Also try: , kind = 'kde'

_= sns.displot(
               data = ad,
               x = 'Age',          # Continuous feature
               y = 'AreaIncome',    # Continuous feature
               kind = 'hist'
              )

In [None]:
# 7.1.4 How are clicks distributed as per Age

_= sns.displot(
                x= 'Age',
                hue = 'Clicked_on_ad',
                kind = 'kde',
                data = ad
               )

In [None]:
# 7.1.5 Distribution of DailyTimeSpentonSite
#sns.displot(ad.DailyTimeSpentonSite)
#sns.displot(ad.AreaIncome)
#sns.displot(ad.DailyInternetUsage)

In [None]:
# 7.2 Using for loop to plot all at once
columns = ['Age', 'AreaIncome', 'DailyInternetUsage', 'DailyTimeSpentonSite']

# 7.2.1
fig,ax = plt.subplots(2,2, figsize = (10,5))

# 7.2.2
ax = ax.flatten()

# 7.2.3
for i in range(len(columns)):
    _ = sns.histplot(ad[columns[i]],ax = ax[i], kde = True)


In [None]:
# 8.0 Relationship of numeric variable with a categorical variable
# Question 4: How is 'Age' related to clicking?
# Question 5: How is DailyInternetUsage related to clicking?
# Question 6: How is 'AreaIncome' related to clicking?

In [None]:
# 8.1 One demo plot of relationship of 'Age' with 'Clicked_on_ad'
#     https://seaborn.pydata.org/generated/seaborn.boxplot.html#seaborn.boxplot

_=sns.boxplot(x = 'Clicked_on_ad',       # Discrete
              y = 'Age',                   # Continuous
              data = ad
              )

In [None]:
# 8.2 'DailyInternetUsage'  vs clicking
#     Try: notch = True : 95% chance median may vary 
#                         from one end of notch to another  end of notch<===

_=sns.boxplot(x = 'Gender',         # Discrete
              y = 'DailyInternetUsage',    # Continuous
              data = ad
              )

In [None]:
# 9.0 Relationship of numeric to numeric variables
#     Using jointplots:
#           While jointplots may not show any linear relationship,
#           they can show by use of contour plots, given X, probable
#           Y through high density areas.

In [None]:
# Question 7: Show joint distribution of DailyTimeSpentonSite and AreaIncome
# Question 8: Show joint distribution of DailyInternetUsage and DailyTimeSpentonSite
# Question 9: Show these plots as kernel density as also 'hex' as also
#             draw regression line
#
# A jointplot = Scatterplot + Density plots

In [None]:
# 9.1 Open first the following
_ = sns.jointplot(
                   x = 'DailyTimeSpentonSite',
                   y=  'AreaIncome',
                   data = ad
                  )

In [None]:
# 9.2  Clearly two clusters are evident here

_= sns.jointplot(
                 data = ad,
                 x = 'DailyInternetUsage',
                 y = 'DailyTimeSpentonSite',
                 kind = "kde"
                )


In [None]:
# 9.3 Or plot hex plot
_= sns.jointplot(
                 data = ad,
                 x = 'DailyInternetUsage',
                 y = 'DailyTimeSpentonSite',
                 kind = "hex"
                )

In [None]:
# 10.0 Relationship between two categorical and one numeric variable
#     Numeric variable has to be some summary measure. So, we have
#     to first calculate this summary measure
#
#     Matrix plots or heatmap
#    #########################

# Question 13: Hour and weekday wise when are clicks most
# Question 14: Quarter wise and weekday wise when are clicks most
# Question 15: Quarter wise and weekday wise when are DailyInternetUsage max and min

In [None]:
# 10.1 When are total clicks more
#      Heatmap of hour vs weekday
#      X and Y labels are DataFrame indexes

# THIS HEAT MAP NEEDS CORRECTION. INCLUDE CLICKS SUM ALSO
# PRESENTLY IT DOES NOT. MODIFY CROSSTAB() TO INCLUDE IT

df_wh = pd.crosstab(ad['hourOfDay'], ad['weekday'])
df_wh

In [None]:
# 10.2 Draw quickly the heatmap. For drawing heatmap,
#     When Pandas DataFrame is provided, the index & column
#     of DataFrame will be used to label the columns and rows
#      of heatmap.
#
_= sns.heatmap(df_wh)

In [None]:
# 10.2.1 For list of ready-made cmaps (plt.cm...), see:
#        https://matplotlib.org/tutorials/colors/colormaps.html
_=sns.heatmap(df_wh, cmap = plt.cm.OrRd)
#_=sns.heatmap(df_wh, cmap = plt.cm.GnBu)

In [None]:
############# I am done ###############

### Structure in data

In [None]:
########################
# Discover Structure in data
# Question: 8 Does data have any pattern to predict 'Clicked_on_ad'
########################

# 11.0 Select only numeric columns for the purpose
num_columns = ad.select_dtypes(include = ['float64', 'int64']).copy()
num_columns.head()
num_columns.shape       # (1000, 8)


In [None]:
# 11.1 To this dataframe, add one more column of 'Clicked_on_ad'
#ad['Clicked_on_ad'] = ad['Clicked_on_ad'].astype('int8')
#num_columns['Clicked_on_ad'] = ad.loc[: , 'Clicked_on_ad']

# 11.1 Normalize  data
cols = ['DailyTimeSpentonSite', 'Age','AreaIncome', 'DailyInternetUsage', 'Gender', 'AdTopicLineLength', 'month' ]

# 11.2
nc = ad[cols].copy()
nc=(nc-nc.mean())/nc.std()
nc.head(3)

In [None]:
# 11.3 To this dataframe, add one more column of 'Clicked_on_ad'
nc['Clicked_on_ad'] = ad['Clicked_on_ad']


In [None]:
# 11.4
_= pd.plotting.parallel_coordinates(
                                     nc,
                                     'Clicked_on_ad',
                                     colormap='winter'
                                    )


In [None]:
# 11.5 Next plot radviz, parallel_coordinates and andrews_curves
_ = pd.plotting.radviz(
                        nc,
                        class_column ='Clicked_on_ad',
                        colormap= 'winter'
                      )



In [None]:
# 11.6
_=pd.plotting.andrews_curves(
                             nc,
                             'Clicked_on_ad',
                             colormap = 'winter'
                            )

In [None]:
# 11.7 What if Data is random
rand = pd.DataFrame(
                     np.random.randn(1000,7),
                     columns = cols    # Assign column names, just like that
                    )

# 6.3.1 Add this columns also
rand['Clicked_on_ad'] = ad['Clicked_on_ad']


In [None]:
# 11.8 Now start plotting
_= pd.plotting.parallel_coordinates(
                                     rand,
                                     'Clicked_on_ad',
                                     colormap='winter'
                                    )


In [None]:
11.9
_=pd.plotting.andrews_curves(
                             rand,
                             'Clicked_on_ad',
                             colormap = 'winter'
                             )

In [None]:
##################### End of structured plots ################

In [None]:
# 11.0 Faceted plots: Show facets of relationships between
#      by numerous categorical variables
#      Facet plots
#      READ 'catplot' AS CONDITIONAL PLOTS


In [None]:
# 11.1
_=sns.catplot(x = 'Gender',
              y = 'DailyInternetUsage',
              row = 'age_cat' ,
              col = 'area_income_cat',
              kind = 'box',
              estimator = np.sum,
              data = ad
             )

In [None]:
# 11.2 Faceted scatter plots or relationship plots
_= sns.relplot(
                x = 'Age',
                y = 'DailyInternetUsage',
                row = 'area_income_cat',
                col = 'weekday',
                kind = 'scatter',
                data = ad
)


In [None]:
# 11.3
sns.relplot(x = 'Age',
            y = 'DailyInternetUsage',
            hue = 'area_income_cat',
            kind = 'scatter',
            data = ad,
            cmap = 'winter')

In [None]:
#############