In [1]:
import pandas as pd
#import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_csv('diamonds.csv')
df = df[['carat','cut','color','clarity','depth','table','price']]
df

FileNotFoundError: [Errno 2] No such file or directory: 'diamonds.csv'

In [None]:
df_samplePick = df.sample(n=300, random_state=7)
df_samplePick

---

## 1. Univariate Analysis

### 1.1 Categorical Data (Qualitative)

#### 1.1.1 Menghitung Jumlah Masing-Masing Kategori Dalam Dataset Tanpa Graph

In [None]:
df['cut'].value_counts()

In [None]:
df['color'].value_counts()

#### 1.1.2 Pie Chart

In [None]:
data = df_samplePick['cut'].value_counts().sort_index()
labels = data.index

#define Seaborn color palette to use
colors = sns.color_palette('pastel')

#create pie chart
plt.title('Diamond Cuts Value Composition\n')
plt.pie(data, labels=labels, colors = colors, autopct='%1.1f%%')
plt.show()

In [None]:
colors

#### 1.1.3 Bar Chart

In [None]:
fig, ax = plt.subplots(figsize=(14,6))

y = df['cut'].value_counts().values
x = df['cut'].value_counts().index

ax.bar(x,y, zorder=2)

for i,freq in enumerate(y):
    ax.text(x=i, y=freq-1500, s=freq, color='white', weight='bold', size=14)

plt.title('Diamond Cuts Value Counts\n', weight='bold', size=14)
plt.xlabel('\nCut', weight='bold')
plt.ylabel('Frequency\n', weight='bold')

plt.grid(zorder=0)
plt.show()

### 1.2 Numerical Data (Quantitative)

In [None]:
df

In [None]:
fig1, ax1 = plt.subplots(figsize=(14,6))

ax1.hist(x=df['table'], bins=15, log=True)

plt.show()

In [None]:
fig2, ax2 = plt.subplots(figsize=(14,6))

ax2.hlines(df['table'], xmin=df['table'].min(), xmax=df['table'].max())

plt.show()

In [None]:
fig3, ax3 = plt.subplots(figsize=(14,6))

sns.boxplot(x=df['table'], ax=ax3)

plt.show()

---
## 2. Bivariate Analysis

In [None]:
df_samplePick

### 2.1 Scatter Plot

In [None]:
fig4, ax4 = plt.subplots(figsize=(14,6))

df_samplePick.plot.scatter(x='table', y='price', ax=ax4, zorder=2)

ax4.set_title('Diamond Table vs Price\n', weight='bold', size=15)
ax4.set_xlabel('\nTable', weight='bold', size=13)
ax4.set_ylabel('Price\n', weight='bold', size=13)

plt.grid()

plt.show()

### 2.2 Hex Plot

In [None]:
fig5, ax5 = plt.subplots(figsize=(14,6))

df.plot.hexbin(x='table', y='price', gridsize=15, ax=ax5)

ax5.set_title('Diamond Table vs Price\n', weight='bold', size=15)
ax5.set_xlabel('\nTable', weight='bold', size=13)
ax5.set_ylabel('Price\n', weight='bold', size=13)

plt.show()

---
## 3. Multivariate Analysis

In [None]:
df_samplePick

### 3.1 Pair Plot

In [None]:
sns.pairplot(df_samplePick, hue='clarity', corner=True)
plt.show()

### 3.2 Scatter Plot

In [None]:
fig5, ax5 = plt.subplots(figsize=(18,14))

#Create custom color dictionary
colorDict = {'Fair':'#ff5252', 'Good':'#ff793f', 'Very Good':'#ffb142', 'Premium':'#706fd3', 'Ideal':'#33d9b2'}

#Plot the data
ax5.scatter(x=df['carat'], y=df['depth'], s=df['price']/100, c=df['cut'].map(colorDict), alpha=0.5, zorder=2)
ax5.set_ylim(50,75)

ax5.set_title('Diamond\n', weight='bold', size=16)
ax5.set_xlabel('\nCarat', weight='bold', size=13)
ax5.set_ylabel('Depth\n', weight='bold', size=13)

#Create list of marker objects for legend parameter input
markers = [plt.Line2D([0,0],[0,0], color=color, marker='o', linestyle='') for color in colorDict.values()]

#Create the legend
plt.legend(markers, colorDict.keys(), prop={'size':13},
           title='Cut\n', title_fontsize=14)

plt.grid()

plt.show()

In [None]:
fig5, ax5 = plt.subplots(figsize=(18,14))

#Create custom color list
colorList = ['#ff5252','#ff793f','#ffb142','#706fd3','#33d9b2']
colorList.reverse()

#Plot the data
for i, cut in enumerate(df['cut'].unique()):
    ax5.scatter(x=df[df['cut']==cut]['carat'],
                y=df[df['cut']==cut]['depth'],
                s=df[df['cut']==cut]['price']/100,
                label=cut, color=colorList[i],
                alpha=0.4, zorder=2)

ax5.set_ylim(50,75)

ax5.set_title('Diamond\n', weight='bold', size=16)
ax5.set_xlabel('\nCarat', weight='bold', size=13)
ax5.set_ylabel('Depth\n', weight='bold', size=13)


#Create the legend
plt.legend(prop={'size':13},
           title='Cut\n', title_fontsize=14)

plt.grid(zorder=0)

plt.show()