In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

In [2]:
# REMOVE WARNINGS
import warnings
warnings.filterwarnings("ignore")

In [3]:
df=pd.read_csv("diamonds.csv")
print(df.shape)
df=df.sample(frac=0.1,random_state=10)
print(df.shape)

(53940, 11)
(5394, 11)


In [4]:
# RESETING THE INDEX
df.reset_index(drop=True,inplace=True)
print(df.head())

   Unnamed: 0  carat        cut color clarity  depth  table  price     x  \
0        8019   1.01    Premium     E     SI1   61.7   56.0   4330  6.44   
1        1584   0.70  Very Good     D     VS1   60.4   58.0   3008  5.71   
2        9139   1.13  Very Good     H     SI2   59.8   59.0   4537  6.75   
3        2788   0.76      Ideal     F     VS2   61.0   55.0   3257  5.89   
4       52430   0.70    Premium     I    VVS1   61.2   59.0   2513  5.65   

      y     z  
0  6.39  3.96  
1  5.78  3.47  
2  6.82  4.06  
3  5.92  3.60  
4  5.69  3.47  


In [5]:
# SCATTER PLOT
fig=px.scatter(df,x="carat",y="price",color="cut",
               title="Scatter Plot of Carat vs Price",
               labels={"carat":"Carat Weight","price":"Price ($)"}
               )
fig.show()

In [7]:
# PLOT A LINE CHART (mean price by carat)
df_mean=df.groupby("carat")["price"].mean().reset_index()
fig=px.line(df_mean,x="carat",y="price",
            title="MEAN PRICE BY CARAT",
            labels={"carat":"Carat Weight","price":"AVERAGE PRICE"}
            )
fig.show()

In [8]:
# BAR PLOT FOR AVERAGE PRICE PER CUT CATEGORY
fig=px.bar(df,x="clarity",y="price",
           title="AVERAGE PRICE BY CUT",
           color="cut")
fig.show()

In [10]:
# CALCULATE STATS
df_stats=df.groupby("clarity")["price"].agg(["mean","std"]).reset_index()
fig=px.bar(df_stats,x="clarity",y="mean",
           color="clarity",error_y="std",
           title="AVERAGE PRICE BY CLARITY"
           )
fig.show()

In [13]:
df_mean=df.groupby("clarity")["price"].mean().reset_index()
fig=px.bar(df_mean,x="clarity",y="price",color="clarity")
fig.show()

In [16]:
# HISTOGRAM
fig=px.histogram(df,x="price",nbins=100,title='PRICE DISTRIBUTION')
fig.show()

In [17]:
# HISTOGRAM
fig=px.histogram(df,x="price",nbins=100,title='PRICE DISTRIBUTION',color='cut')
fig.show()

In [18]:
# DENSITY CONTOUR PLOT
fig=px.density_contour(df,x="carat",y="price",title="DENSITY CONTOUR PLOT",color='cut')
fig.show()

In [21]:
# BOX PLOT
fig=px.box(df,x="cut",y="price",color="clarity",
           title="PRICE DISTRIBUTION BY CUT",
           labels={"cut":"Cut","price":"Price ($)","clarity":"Clarity"},
           #outliers
           points="suspectedoutliers" # options: all, suspectedoutliers, outliers
           )
fig.show()

In [22]:
# VIOLIN PLOT
fig=px.violin(df,x="cut",y="price",
              #color="clarity",
           title="PRICE DISTRIBUTION BY CUT",
           labels={"cut":"Cut","price":"Price ($)",
                   #"clarity":"Clarity"
                   },
           #outliers
           points="suspectedoutliers" # options: all, suspectedoutliers, outliers
           )
fig.show()

In [23]:
# FACET SCATTER PLOT BY CLARITY
fig=px.scatter(df,x="carat",y="price",
               color="cut",title="Price vs carat by cut and clarity",
               facet_col="clarity",
               )
fig.show()

In [24]:
# FACET SCATTER PLOT BY CLARITY
fig=px.scatter(df,x="carat",y="price",
               color="cut",title="Price vs carat by cut and clarity",
               animation_frame="cut",
               size="price",
               facet_col="clarity",
               )
fig.show()

In [25]:
# DENSITY HEATMAP OF CARAT AND PRICE
fig=px.density_heatmap(df,x="carat",y="price",title="DENSITY HEATMAP OF CARAT VS PRICE")
fig.show()

In [26]:
from plotly.subplots import make_subplots

# CREATE SUBPLOTS
fig=make_subplots(rows=1 ,cols=2 , subplot_titles=("Carat vs Price","Density Heatmap of Carat vs Price"))

# Scatter plot of carat vs price
scatter=px.scatter(df,x="carat",y="price").data[0]
fig.add_trace(scatter,row=1,col=1)

# DENSITY HEATMAP OF CARAT VS PRICE
heatmap=px.density_heatmap(df,x="carat",y="price").data[0]
fig.add_trace(heatmap,row=1,col=2)

# UPDATE LAYOUT
fig.update_layout(title_text="CARAT VS PRICE AND DENSITY HEATMAP OF CARAT VS PRICE")
fig.show()

In [28]:
# CREATE HEATMAP OF CORREALTION MATRIX OF ONLY NUMERIC VALUES
corr=df[["price","carat","depth","table","x","y","z"]].corr()
fig=go.Figure(data=go.Heatmap(x=corr.index.values,
                              y=corr.columns.values,
                              z=corr.values,
                              colorscale="blues"
                              ))
fig.show()