In [1]:
import pandas as pd
import numpy as np
import os
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import math

In [2]:
df = pd.read_excel("./Data/sub-variant.xlsx")
df.head()

Unnamed: 0,Sample,Probability of New Variant (NV) from the testing laboratory,Actual Class
0,1,0.340147,O
1,2,0.768768,NV
2,3,0.927277,NV
3,4,0.328154,NV
4,5,0.283847,O


In [3]:
df["Actual Class"].unique()

array(['O', 'NV'], dtype=object)

In [4]:
df['Actual Class'] = df['Actual Class'].map({'O': 0, 'NV': 1})
df.rename(columns = {'Probability of New Variant (NV) from the testing laboratory':'propensity'}, inplace = True)
df = df.sort_values(by = "propensity",axis=0, ascending=False)
df.head()

Unnamed: 0,Sample,propensity,Actual Class
33,34,0.989342,1
2,3,0.927277,1
19,20,0.911406,0
30,31,0.896724,1
26,27,0.875694,0


In [5]:
def Classification_Metrics(df,p):
    df1 = df[df["propensity"]>p]
    df2 = df[df["propensity"]<p]
    n11 = df1[df1["Actual Class"]==1]["Actual Class"].count()
    #print("n11 ", n11)
    n21 = df1[df1["Actual Class"]==0]["Actual Class"].count()
    #print("n21 ", n21)
    n22 = df2[df2["Actual Class"]==0]["Actual Class"].count()
    #print("n22 ", n22)
    n12 = df2[df2["Actual Class"]==1]["Actual Class"].count()
    #print("n12 ", n12)
    F1 = 2*n11/(2*n11+n21+n12)
    MCC = MCC_Cal(n11,n12,n21,n22)
    #MCC = ((n11*n22)-(n21*n12))/math.sqrt((n11+n21)*(n11+n12)*(n21+n22)*(n12+n22))
    #print("F1 ", F1)
    #print("MCC ", MCC)
    sensitivity = n11/(n11+n12)
    specificity = 1 - (n22/(n21+n22))
    return sensitivity,specificity,F1,MCC

In [6]:
def MCC_Cal(TP,FN,FP,TN):
    if TP == 0 and FP == 0:
        if FN == 0 and TN != 0:
            MCC = 1
        elif FN != 0 and TN != 0:
            MCC = 0
        else:
            MCC = -1
    elif TP == 0 and FN == 0:
        if FP == 0 and TN != 0:
            MCC = 1
        elif FP != 0 and TN != 0:
            MCC = 0
        else:
            MCC = -1
    elif TN == 0 and FP == 0:
        if TP != 0 and FN == 0:
            MCC = 1
        elif TP != 0 and FN != 0:
            MCC = 0
        else:
            MCC = -1
    elif TN == 0 and FN == 0:
        if TP != 0 and FP == 0:
            MCC = 1
        elif TP != 0 and FP != 0:
            MCC = 0
        else:
            MCC = -1
    else:
        MCC = ((TP*TN)-(FP*FN))/math.sqrt((TP+FP)*(TP+FN)*(FP+TN)*(FN+TN))
    return MCC
        

In [7]:
pro=[0,0.2,0.4,0.5,0.6,0.8,1]
sensitivity = []
specificity = []
F1_Score = []
MCC = []
for p in pro:
    a,b,c,d = Classification_Metrics(df,p)
    sensitivity.append(a)
    specificity.append(b) 
    F1_Score.append(c)
    MCC.append(d)
table = pd.DataFrame({"Cutoff":pro, "Sensitivity":sensitivity, "1 - Specificity":specificity, "F1_Score":F1_Score, "MCC":MCC})
table

Unnamed: 0,Cutoff,Sensitivity,1 - Specificity,F1_Score,MCC
0,0.0,1.0,1.0,0.612245,0.0
1,0.2,1.0,0.736842,0.681818,0.368939
2,0.4,0.866667,0.315789,0.764706,0.550877
3,0.5,0.733333,0.210526,0.733333,0.522807
4,0.6,0.666667,0.210526,0.689655,0.460195
5,0.8,0.4,0.105263,0.521739,0.345005
6,1.0,0.0,0.0,0.0,0.0


In [8]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig = make_subplots(rows=1, cols=1)

fig.add_trace(
    go.Scatter(x=[0, 1], y=[0,1]),
    row=1, col=1
)

fig.add_trace(
    go.Scatter(x=specificity, y=sensitivity),
    row=1, col=1
)

fig.update_layout(height=600, width=800, title_text='ROC Curve',xaxis_title="1 - Specificity",yaxis_title="Sensitivity")
fig.show()


#fig = px.line(df, x=specificity, y=sensitivity, title='ROC Curve')
#fig.show()

a) From the graph we take Sensitivity = 0.87 and Specificity = 0.74 for an optimal cutoff is 0.4
b) MCC = 0.550877 and F1_Score = 0.764706 for a recommednded balanced cutoff = 0.4 
MCC = 0.522807 and F1_Score = 0.733333 for a defaut cutoff = 0.5

MCC and F1_Score improved for the recommended cutoff than that of default cutoff which is a positive light on model perfomance and usability

c) In this scenario, the goal is to assess the classifier's ability to distinguish between Omicron and the new variant. Since we are interested in identifying new variant cases with high sensitivity, we require a measure that prioritizes sensitivity or recall, such as F-score. As a result, using F-score to evaluate the classifier's performance is recommended.

### 2

In [9]:
n11 = 38950
n12 = 1050
n21 = 1500
n22 = 58500
Accuracy = (n11+n22)/(n11+n12+n21+n22)
Error_rate = 1-Accuracy
sensitivity = n11/(n11+n12)
specificity = 1 - (n22/(n21+n22))
print("Error_rate ",Error_rate)
print("Sensitivity ",sensitivity)
print("Specificity ",specificity)

Error_rate  0.025499999999999967
Sensitivity  0.97375
Specificity  0.025000000000000022


In [10]:
n11 = 2500
n12 = 1250
n21 = 1275
n22 = 4975
Accuracy = (n11+n22)/(n11+n12+n21+n22)
Error_rate = 1-Accuracy
sensitivity = n11/(n11+n12)
specificity = 1 - (n22/(n21+n22))
print("Error_rate ",Error_rate)
print("Sensitivity ",sensitivity)
print("Specificity ",specificity)
#overfitting

Error_rate  0.25249999999999995
Sensitivity  0.6666666666666666
Specificity  0.20399999999999996


There appears to have a overfitting model on validation datasetwhich can be seen from the data,

On the training set, the model achieved a very low error rate of 0.0255, indicating that it was able to accurately classify a large majority of the cases in the dataset. The sensitivity, which measures the proportion of true positive cases that were correctly identified, was very high at 0.97375, indicating that the model was able to correctly identify almost all of the Diabetic retinopathy cases in the training set. However, the specificity, which measures the proportion of true negative cases that were correctly identified, was very low at 0.025, indicating that the model incorrectly identified a large proportion of normal cases as Diabetic retinopathy cases.

On the validation set, however, the model performed much worse. The error rate was much higher at 0.2525, indicating that the model incorrectly classified a large proportion of cases in the validation set. The sensitivity was still moderate at 0.6667, indicating that the model was able to correctly identify a reasonable proportion of the Diabetic retinopathy cases in the validation set. However, the specificity was still quite low at 0.204, indicating that the model incorrectly identified a large proportion of normal cases as Diabetic retinopathy cases in the validation set.


### 4

In [13]:
bc = pd.read_excel("./Data/Bostonconsultancy.xlsx")
bc.head()

Unnamed: 0,Sn,Companies Name,Region,Data Warehouse,Cloud data storage size,BI Tool,Service Cost,Probability of winning the opportunity,Actual Class
0,1,Walmart,Europe,AWS Redshift,455 TB,AWS Quicksight,610 $,0.397,0
1,2,Exxon Mobil,Europe,Azure Synapse,233 TB,Microsoft Power BI,888 $,0.176,0
2,3,Apple,Europe,AWS Redshift,859 TB,AWS Quicksight,682 $,0.416,0
3,4,Berkshire Hathaway,India,Azure Synapse,66 TB,Microsoft Power BI,710 $,0.385,0
4,5,Amazon.com,USA,GCP Big Query,338 TB,Looker,1500 $,0.998,1


In [14]:
bc.rename(columns = {'Probability of winning the opportunity':'propensity'}, inplace = True)
bc = bc.sort_values(by = "propensity",axis=0, ascending=False)
bc.head()

Unnamed: 0,Sn,Companies Name,Region,Data Warehouse,Cloud data storage size,BI Tool,Service Cost,propensity,Actual Class
4,5,Amazon.com,USA,GCP Big Query,338 TB,Looker,1500 $,0.998,1
69,70,Charter Communications,USA,GCP Big Query,170 TB,Looker,618 $,0.996,1
65,66,AIG,USA,GCP Big Query,858 TB,Looker,490 $,0.988,0
43,44,MetLife,USA,GCP Big Query,853 TB,Looker,948 $,0.984,1
29,30,Citigroup,USA,GCP Big Query,749 TB,Looker,702 $,0.974,0


In [15]:
bc["Gain, C(k)"] = bc["Actual Class"].cumsum()
bc.head()

Unnamed: 0,Sn,Companies Name,Region,Data Warehouse,Cloud data storage size,BI Tool,Service Cost,propensity,Actual Class,"Gain, C(k)"
4,5,Amazon.com,USA,GCP Big Query,338 TB,Looker,1500 $,0.998,1,1
69,70,Charter Communications,USA,GCP Big Query,170 TB,Looker,618 $,0.996,1,2
65,66,AIG,USA,GCP Big Query,858 TB,Looker,490 $,0.988,0,2
43,44,MetLife,USA,GCP Big Query,853 TB,Looker,948 $,0.984,1,3
29,30,Citigroup,USA,GCP Big Query,749 TB,Looker,702 $,0.974,0,3


In [16]:
bc.shape[0]

72

In [17]:
lst = list(range(1,bc["Actual Class"].count()+1))
bc["Case, K"] = lst
bc.head()

Unnamed: 0,Sn,Companies Name,Region,Data Warehouse,Cloud data storage size,BI Tool,Service Cost,propensity,Actual Class,"Gain, C(k)","Case, K"
4,5,Amazon.com,USA,GCP Big Query,338 TB,Looker,1500 $,0.998,1,1,1
69,70,Charter Communications,USA,GCP Big Query,170 TB,Looker,618 $,0.996,1,2,2
65,66,AIG,USA,GCP Big Query,858 TB,Looker,490 $,0.988,0,2,3
43,44,MetLife,USA,GCP Big Query,853 TB,Looker,948 $,0.984,1,3,4
29,30,Citigroup,USA,GCP Big Query,749 TB,Looker,702 $,0.974,0,3,5


In [18]:
y = bc["Actual Class"].sum()/bc["Actual Class"].count()
bc["Baseline, A(k)"] = y*bc["Case, K"]
bc.head()

Unnamed: 0,Sn,Companies Name,Region,Data Warehouse,Cloud data storage size,BI Tool,Service Cost,propensity,Actual Class,"Gain, C(k)","Case, K","Baseline, A(k)"
4,5,Amazon.com,USA,GCP Big Query,338 TB,Looker,1500 $,0.998,1,1,1,0.472222
69,70,Charter Communications,USA,GCP Big Query,170 TB,Looker,618 $,0.996,1,2,2,0.944444
65,66,AIG,USA,GCP Big Query,858 TB,Looker,490 $,0.988,0,2,3,1.416667
43,44,MetLife,USA,GCP Big Query,853 TB,Looker,948 $,0.984,1,3,4,1.888889
29,30,Citigroup,USA,GCP Big Query,749 TB,Looker,702 $,0.974,0,3,5,2.361111


In [19]:
bc["Lift, L(k)"] = bc["Gain, C(k)"]/bc["Baseline, A(k)"]
bc["Baseline, B(k)"] = bc["Baseline, A(k)"]/bc["Baseline, A(k)"]
bc.head()

Unnamed: 0,Sn,Companies Name,Region,Data Warehouse,Cloud data storage size,BI Tool,Service Cost,propensity,Actual Class,"Gain, C(k)","Case, K","Baseline, A(k)","Lift, L(k)","Baseline, B(k)"
4,5,Amazon.com,USA,GCP Big Query,338 TB,Looker,1500 $,0.998,1,1,1,0.472222,2.117647,1.0
69,70,Charter Communications,USA,GCP Big Query,170 TB,Looker,618 $,0.996,1,2,2,0.944444,2.117647,1.0
65,66,AIG,USA,GCP Big Query,858 TB,Looker,490 $,0.988,0,2,3,1.416667,1.411765,1.0
43,44,MetLife,USA,GCP Big Query,853 TB,Looker,948 $,0.984,1,3,4,1.888889,1.588235,1.0
29,30,Citigroup,USA,GCP Big Query,749 TB,Looker,702 $,0.974,0,3,5,2.361111,1.270588,1.0


In [20]:
bc["Service Cost"] = bc["Service Cost"].apply(lambda x: x.replace("$", ""))
bc["Service Cost"] = bc["Service Cost"].astype('int64')
bc.head()

Unnamed: 0,Sn,Companies Name,Region,Data Warehouse,Cloud data storage size,BI Tool,Service Cost,propensity,Actual Class,"Gain, C(k)","Case, K","Baseline, A(k)","Lift, L(k)","Baseline, B(k)"
4,5,Amazon.com,USA,GCP Big Query,338 TB,Looker,1500,0.998,1,1,1,0.472222,2.117647,1.0
69,70,Charter Communications,USA,GCP Big Query,170 TB,Looker,618,0.996,1,2,2,0.944444,2.117647,1.0
65,66,AIG,USA,GCP Big Query,858 TB,Looker,490,0.988,0,2,3,1.416667,1.411765,1.0
43,44,MetLife,USA,GCP Big Query,853 TB,Looker,948,0.984,1,3,4,1.888889,1.588235,1.0
29,30,Citigroup,USA,GCP Big Query,749 TB,Looker,702,0.974,0,3,5,2.361111,1.270588,1.0


In [21]:
Cost = 60+75+100+50+50
#bc["Net Profit"] = (bc["Service Cost"] - Cost).cumsum()
bc["Net Profit"] = np.where(bc["Actual Class"]==1, (bc["Service Cost"] - Cost), -Cost).cumsum()
bc.head()

Unnamed: 0,Sn,Companies Name,Region,Data Warehouse,Cloud data storage size,BI Tool,Service Cost,propensity,Actual Class,"Gain, C(k)","Case, K","Baseline, A(k)","Lift, L(k)","Baseline, B(k)",Net Profit
4,5,Amazon.com,USA,GCP Big Query,338 TB,Looker,1500,0.998,1,1,1,0.472222,2.117647,1.0,1165
69,70,Charter Communications,USA,GCP Big Query,170 TB,Looker,618,0.996,1,2,2,0.944444,2.117647,1.0,1448
65,66,AIG,USA,GCP Big Query,858 TB,Looker,490,0.988,0,2,3,1.416667,1.411765,1.0,1113
43,44,MetLife,USA,GCP Big Query,853 TB,Looker,948,0.984,1,3,4,1.888889,1.588235,1.0,1726
29,30,Citigroup,USA,GCP Big Query,749 TB,Looker,702,0.974,0,3,5,2.361111,1.270588,1.0,1391


In [22]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig = make_subplots(rows=1, cols=1)

fig.add_trace(
    go.Scatter(mode="lines+markers",x=bc["Case, K"], y=bc["Baseline, A(k)"]),
    row=1, col=1
)

fig.add_trace(
    go.Scatter(mode="lines+markers",x=bc["Case, K"], y=bc["Gain, C(k)"]),
    row=1, col=1
)


fig.update_layout(height=600, width=800, title_text='Gain Chart',xaxis_title="Case #",yaxis_title="Cum Gain")
fig.show()


#fig = px.line(df, x=specificity, y=sensitivity, title='ROC Curve')
#fig.show()

In [23]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig = make_subplots(rows=1, cols=1)

fig.add_trace(
    go.Scatter(mode="lines+markers",x=bc["Case, K"], y=bc["Net Profit"]),
    row=1, col=1
)

fig.update_layout(height=600, width=800, title_text='Net Profit',xaxis_title="Case #",yaxis_title="Cum Net Profit")
fig.show()


#fig = px.line(df, x=specificity, y=sensitivity, title='ROC Curve')
#fig.show()

In [24]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig = make_subplots(rows=1, cols=1)

fig.add_trace(
    go.Scatter(mode="lines+markers",x=bc["Case, K"], y=bc["Baseline, B(k)"]),
    row=1, col=1
)

fig.add_trace(
    go.Scatter(mode="lines+markers",x=bc["Case, K"], y=bc["Lift, L(k)"]),
    row=1, col=1
)

fig.update_layout(height=600, width=800, title_text='Lift Chart',xaxis_title="Case #",yaxis_title="Lift")
fig.show()


#fig = px.line(df, x=specificity, y=sensitivity, title='ROC Curve')
#fig.show()

In [26]:
bc1 = pd.read_excel("./Data/Bostonconsultancy.xlsx")
bc1.head()

Unnamed: 0,Sn,Companies Name,Region,Data Warehouse,Cloud data storage size,BI Tool,Service Cost,Probability of winning the opportunity,Actual Class
0,1,Walmart,Europe,AWS Redshift,455 TB,AWS Quicksight,610 $,0.397,0
1,2,Exxon Mobil,Europe,Azure Synapse,233 TB,Microsoft Power BI,888 $,0.176,0
2,3,Apple,Europe,AWS Redshift,859 TB,AWS Quicksight,682 $,0.416,0
3,4,Berkshire Hathaway,India,Azure Synapse,66 TB,Microsoft Power BI,710 $,0.385,0
4,5,Amazon.com,USA,GCP Big Query,338 TB,Looker,1500 $,0.998,1


In [27]:
bc1.rename(columns = {'Probability of winning the opportunity':'propensity'}, inplace = True)
bc1 = bc1.sort_values(by = "propensity",axis=0, ascending=False)
bc1.head()

Unnamed: 0,Sn,Companies Name,Region,Data Warehouse,Cloud data storage size,BI Tool,Service Cost,propensity,Actual Class
4,5,Amazon.com,USA,GCP Big Query,338 TB,Looker,1500 $,0.998,1
69,70,Charter Communications,USA,GCP Big Query,170 TB,Looker,618 $,0.996,1
65,66,AIG,USA,GCP Big Query,858 TB,Looker,490 $,0.988,0
43,44,MetLife,USA,GCP Big Query,853 TB,Looker,948 $,0.984,1
29,30,Citigroup,USA,GCP Big Query,749 TB,Looker,702 $,0.974,0


In [28]:
bc1.shape

(72, 9)

In [29]:
bc1.drop(bc1.tail(bc1.shape[0]%10).index,inplace=True)
bc1.shape

(70, 9)

In [30]:
bc1.head()

Unnamed: 0,Sn,Companies Name,Region,Data Warehouse,Cloud data storage size,BI Tool,Service Cost,propensity,Actual Class
4,5,Amazon.com,USA,GCP Big Query,338 TB,Looker,1500 $,0.998,1
69,70,Charter Communications,USA,GCP Big Query,170 TB,Looker,618 $,0.996,1
65,66,AIG,USA,GCP Big Query,858 TB,Looker,490 $,0.988,0
43,44,MetLife,USA,GCP Big Query,853 TB,Looker,948 $,0.984,1
29,30,Citigroup,USA,GCP Big Query,749 TB,Looker,702 $,0.974,0


As we are allowed to do 10 discovery calls per month 
We have divided the dataset with 10 Companies per group instead of having 7 records per group(70/10) which is usually the case in generic Decile Chart.

In [31]:
bc1['Decile'] = pd.qcut(bc1['propensity'], 7, labels=[i for i in range(7,0,-1)])
bc1

Unnamed: 0,Sn,Companies Name,Region,Data Warehouse,Cloud data storage size,BI Tool,Service Cost,propensity,Actual Class,Decile
4,5,Amazon.com,USA,GCP Big Query,338 TB,Looker,1500 $,0.998,1,1
69,70,Charter Communications,USA,GCP Big Query,170 TB,Looker,618 $,0.996,1,1
65,66,AIG,USA,GCP Big Query,858 TB,Looker,490 $,0.988,0,1
43,44,MetLife,USA,GCP Big Query,853 TB,Looker,948 $,0.984,1,1
29,30,Citigroup,USA,GCP Big Query,749 TB,Looker,702 $,0.974,0,1
...,...,...,...,...,...,...,...,...,...,...
64,65,Cigna,Europe,Azure Synapse,37 TB,Microsoft Power BI,1000 $,0.094,0,7
61,62,Goldman Sachs Group,Australia,Azure Synapse,103 TB,Microsoft Power BI,518 $,0.092,0,7
19,20,Kroger,Europe,Azure Synapse,529 TB,Microsoft Power BI,1000 $,0.084,0,7
41,42,Lowe's,Australia,Azure Synapse,845 TB,Microsoft Power BI,578 $,0.065,0,7


In [32]:
bc1["Service Cost"] = bc1["Service Cost"].apply(lambda x: x.replace("$", ""))
bc1.head()

Unnamed: 0,Sn,Companies Name,Region,Data Warehouse,Cloud data storage size,BI Tool,Service Cost,propensity,Actual Class,Decile
4,5,Amazon.com,USA,GCP Big Query,338 TB,Looker,1500,0.998,1,1
69,70,Charter Communications,USA,GCP Big Query,170 TB,Looker,618,0.996,1,1
65,66,AIG,USA,GCP Big Query,858 TB,Looker,490,0.988,0,1
43,44,MetLife,USA,GCP Big Query,853 TB,Looker,948,0.984,1,1
29,30,Citigroup,USA,GCP Big Query,749 TB,Looker,702,0.974,0,1


In [33]:
bc1["Service Cost"] = bc1["Service Cost"].astype('int64')

In [34]:
bc2=pd.DataFrame(bc1.groupby(["Decile"])["Actual Class"].mean())
bc2 = bc2.sort_index(ascending=False)
bc2.rename(columns={"Actual Class":"Dd"},inplace=True)
bc2.head()

Unnamed: 0_level_0,Dd
Decile,Unnamed: 1_level_1
1,0.7
2,0.6
3,0.6
4,0.5
5,0.6


In [35]:
bc2["y1"]=bc1["Actual Class"].mean()
bc2

Unnamed: 0_level_0,Dd,y1
Decile,Unnamed: 1_level_1,Unnamed: 2_level_1
1,0.7,0.485714
2,0.6,0.485714
3,0.6,0.485714
4,0.5,0.485714
5,0.6,0.485714
6,0.3,0.485714
7,0.1,0.485714


In [36]:
bc2["Ld"] = bc2["Dd"]/bc2["y1"]
bc2

Unnamed: 0_level_0,Dd,y1,Ld
Decile,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,0.7,0.485714,1.441176
2,0.6,0.485714,1.235294
3,0.6,0.485714,1.235294
4,0.5,0.485714,1.029412
5,0.6,0.485714,1.235294
6,0.3,0.485714,0.617647
7,0.1,0.485714,0.205882


In [37]:
fig = px.bar(bc2, x=bc2.index, y='Ld', color='Ld')
fig.add_hline(y=1)
fig.update_layout(height=600, width=800, title_text='Decile Chart')
fig.show()

In [38]:
bc1[bc1["Decile"]==1]

Unnamed: 0,Sn,Companies Name,Region,Data Warehouse,Cloud data storage size,BI Tool,Service Cost,propensity,Actual Class,Decile
4,5,Amazon.com,USA,GCP Big Query,338 TB,Looker,1500,0.998,1,1
69,70,Charter Communications,USA,GCP Big Query,170 TB,Looker,618,0.996,1,1
65,66,AIG,USA,GCP Big Query,858 TB,Looker,490,0.988,0,1
43,44,MetLife,USA,GCP Big Query,853 TB,Looker,948,0.984,1,1
29,30,Citigroup,USA,GCP Big Query,749 TB,Looker,702,0.974,0,1
47,48,PepsiCo,USA,GCP Big Query,332 TB,Looker,827,0.963,1,1
24,25,Bank of America,USA,GCP Big Query,224 TB,Looker,1000,0.963,1,1
62,63,Morgan Stanley,USA,GCP Big Query,38 TB,Looker,644,0.949,0,1
11,12,Ford Motor,USA,GCP Big Query,532 TB,Looker,953,0.913,1,1
26,27,Home Depot,USA,GCP Big Query,463 TB,Data Studio,525,0.91,1,1


From this we can see that the clients they are to reach out to following clients
- Amazon.com
- Charter Communications
- AIG
- MetLife
- Citigroup
- PepsiCo
- Bank of America
- Morgan Stanley	
- Ford Motor	
- Home Depot
because by following the recommendation we get lift of 1.44

c) The top recommended company by the model belongs to USA and using GCP Big Query and Looker BI Tool majorly and with an average cloud data storage size of more than 500TB