In [2]:
import plotly.express as px
import pandas as pd
import plotly.graph_objects as go
from sklearn.preprocessing import StandardScaler
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA


# 1

In [3]:
data = pd.read_csv('heart.csv')

columns = ['trtbps', 'chol', 'thalachh', 'oldpeak', 'lable']

fig = px.scatter_matrix(data, dimensions=['trtbps', 'chol', 'thalachh', 'oldpeak'],
                        color='lable', height=600, width=800,
                        color_continuous_scale=px.colors.sequential.Plasma)
fig.show()

In [4]:
columns = ['age', 'sex', 'chol', 'thalachh', 'trtbps', 'oldpeak']

fig = px.scatter_matrix(data, dimensions=columns,
                        color='lable', height=700, width=900,
                        color_continuous_scale=px.colors.sequential.Plasma)

# Hiển thị biểu đồ
fig.show()

In [5]:
fig = px.parallel_coordinates(data,
                              dimensions=['trtbps', 'chol', 'thalachh', 'oldpeak','lable'],
                              color='lable',
                              color_continuous_scale=px.colors.sequential.Viridis, color_continuous_midpoint=1.0)

fig.show()

In [6]:
columns = ['chol', 'thalachh', 'trtbps','age','sex']
fig = px.parallel_coordinates(data,
                              dimensions=columns,
                              color='sex',
                              color_continuous_scale=px.colors.sequential.Plasma, color_continuous_midpoint=1.0)

fig.show()

In [7]:
dataCopy = data.copy()
scaler = StandardScaler()
x_scaled = scaler.fit_transform(dataCopy)

In [8]:
pca = PCA(n_components = 2)
pca_result = pca.fit_transform(x_scaled)
pca_df = pd.DataFrame(data = pca_result, columns = ['PC1','PC2'])
pca_df['lable'] = dataCopy['lable']

fig = px.scatter(pca_df, x = 'PC1',y = 'PC2', color = 'lable',
                 title = 'PCA of heart.csv')
fig.show()

In [9]:
pca = PCA(n_components = 3)
pca_result = pca.fit_transform(x_scaled)
pca_df = pd.DataFrame(data = pca_result, columns = ['PC1','PC2','PC3'])
pca_df['lable'] = dataCopy['lable']

px.scatter_3d(pca_df, x= 'PC1',y = 'PC2',z = 'PC3', color = 'lable',
              title = 'PCA of heart.csv')

In [10]:
tsne = TSNE(n_components=2, random_state = 0)
tsne_result = tsne.fit_transform(dataCopy)
tsne_df = pd.DataFrame(data = tsne_result, columns = ['tsne1','tsne2'])

tsne_df['lable'] = dataCopy['lable']
fig = px.scatter(tsne_df, x= 'tsne1', y = 'tsne2', color = 'lable')
fig.show()

In [11]:
tsne = TSNE(n_components=3, random_state = 0)
tsne_result = tsne.fit_transform(dataCopy)
tsne_df = pd.DataFrame(data = tsne_result, columns = ['tsne1','tsne2','tsne3'])

tsne_df['lable'] = dataCopy['lable']
fig = px.scatter_3d(tsne_df, x= 'tsne1', y = 'tsne2', z = 'tsne3',color = 'lable')
fig.show()

# 2

In [12]:
data2 = pd.read_csv('telecom_churn.csv')

In [13]:
data21 = data2.groupby(by = 'Customer service calls').sum().reset_index()
data21.head()

Unnamed: 0,Customer service calls,State,Account length,Area code,International plan,Voice mail plan,Number vmail messages,Total day minutes,Total day calls,Total day charge,Total eve minutes,Total eve calls,Total eve charge,Total night minutes,Total night calls,Total night charge,Total intl minutes,Total intl calls,Total intl charge,Churn
0,0,NJALMOWVRIFLAZVAWYMOOKAKMDWYMNMNHIDCVTLAMTMAVT...,70608,304528,NoYesYesYesNoNoNoNoNoNoNoNoYesYesYesNoNoNoNoNo...,NoNoNoYesNoNoNoNoYesNoYesYesYesNoNoYesNoNoYesN...,5676,126739.3,70327,21546.13,140107.7,69770,11909.35,140946.9,69354,6342.71,7239.9,3098,1955.11,92
1,1,KSOHLAIAIDVATXHINHAZMANJWIWYININCOOKIDUTNVMNMD...,120202,514039,NoNoNoNoNoNoNoNoNoNoNoNoNoNoNoNoNoNoNoNoNoNoNo...,YesYesNoNoYesYesNoNoNoNoNoNoNoYesNoNoNoNoNoNoN...,9960,211936.3,119343,36029.75,239056.2,118254,20319.96,236393.6,118679,10637.73,11986.1,5365,3236.91,122
2,2,OHSCILARORDEVAUTCAMNNCWAMNTXKYIDMIMAMOIDNVTXNV...,75312,331765,YesNoNoNoNoNoNoNoNoNoNoNoNoNoNoNoNoNoNoNoNoNoN...,NoNoNoNoYesNoNoNoYesYesNoYesNoNoYesNoYesNoYesN...,6293,134156.1,75342,22807.02,152042.4,76194,12923.8,152518.0,76987,6863.37,7800.5,3397,2106.54,87
3,3,OKMAMTVTNEMTLAGAAKGAIDMIIACOWISDNJNMORCOMEAZUT...,43515,189797,YesNoNoNoNoNoNoNoYesNoNoNoNoNoNoNoNoNoYesNoNoN...,NoYesNoNoNoNoNoYesYesNoNoNoNoYesNoYesNoNoNoYes...,2928,78642.8,42864,13369.41,84952.2,42390,7221.03,86780.0,42472,3905.12,4348.8,1932,1174.33,44
4,4,INIANYTXNYINAZIAAZALMSMSTXWVMEVAAZORNJFLRIMEIN...,17042,72878,NoNoNoNoNoNoNoNoNoNoNoNoNoYesYesNoNoNoNoNoNoNo...,NoNoNoNoNoNoNoYesNoNoNoYesNoYesYesYesNoYesNoYe...,1245,30589.2,16895,5200.25,33543.7,16948,2851.21,33050.7,16130,1487.31,1697.0,714,458.31,76


In [15]:
fig = go.Figure()
fig.add_trace(go.Line(x=data21['Customer service calls'], y = data21['Total day minutes']))
fig.add_trace(go.Line(x=data21['Customer service calls'], y = data21['Total night minutes']))
fig.add_trace(go.Line(x=data21['Customer service calls'], y = data21['Total eve minutes']))
fig.show()


plotly.graph_objs.Line is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.scatter.Line
  - plotly.graph_objs.layout.shape.Line
  - etc.




In [16]:
dim = ['Total day minutes','Total eve minutes','Total night minutes','Total intl minutes']
px.scatter_matrix(data2, dimensions = dim, color = 'Customer service calls')

In [17]:
dim1 = ['Total day minutes','Total eve minutes','Total night minutes','Total intl minutes','Churn']
fig = px.parallel_coordinates(data21,
        dimensions = dim1,
        color = 'Customer service calls',
        color_continuous_scale= px.colors.sequential.Viridis)
fig.update_layout(width = 1000,
                 height = 600)
fig.show()

In [18]:
data2['International plan'] = data2['International plan'].map({'Yes':1,'No':0})
data2['Voice mail plan'] = data2['Voice mail plan'].map({'Yes':1, 'No':0})

data2 = data2.drop(columns = ['State'])
x = data2.drop(['Churn'], axis = 1)
scalar = StandardScaler()
x_scaled = scalar.fit_transform(x)

In [20]:
pca = PCA(n_components = 2)
pca_result = pca.fit_transform(x_scaled)
pca_df = pd.DataFrame(data = pca_result, columns = ['PC1','PC2'])
pca_df['Churn'] = data2['Churn']

fig = px.scatter(pca_df, x = 'PC1',y = 'PC2', color = 'Churn',
                 title = 'PCA của bộ data heart.csv')
fig.show()

In [21]:
tsne = TSNE(n_components = 2, random_state = 42)
tsne_result = tsne.fit_transform(x_scaled)

tsne_df = pd.DataFrame(data = tsne_result, columns = ['tsne1','tsne2'])
tsne_df['Churn'] = data2['Churn']
fig = px.scatter(tsne_df,x = 'tsne1', y = 'tsne2', color = 'Churn')
fig.show()


# 3

In [22]:
data3= pd.read_csv('mnist_train.csv')

In [23]:
x = data3.drop(['label'], axis = 1)

In [24]:
scaler = StandardScaler()
x_scaled = scaler.fit_transform(x)

In [26]:
pca = PCA(n_components = 2)
pca_result = pca.fit_transform(x_scaled)
pca_df = pd.DataFrame(data = pca_result, columns = ['PC1','PC2'])
pca_df['label'] = data3['label']

fig = px.scatter(pca_df, x = 'PC1',y = 'PC2',
                 color = 'label',
                 title = 'PCA of mnist_train.csv')
fig.show()