<a href="https://colab.research.google.com/github/davidofitaly/practice_data/blob/main/titanic/01_charts_conclusions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**1. INITIAL ANALYSIS**

In [16]:
import seaborn as sns
import plotly.express as px

tips_data = sns.load_dataset('tips')

In [13]:
tips_data.head()


# explanation of the columns:
# total_bill: total bill amount
# tip: tip value
# sex: sex of the person -> Female / Male
# smoker: whether the person smokes -> No / Yes
# day: day of the week -> Thursday, Friday, Saturday, Sunday
# time: meal time -> Dinner / Lunch
# size: number of people at the table


Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [14]:
tips_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 244 entries, 0 to 243
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   total_bill  244 non-null    float64 
 1   tip         244 non-null    float64 
 2   sex         244 non-null    category
 3   smoker      244 non-null    category
 4   day         244 non-null    category
 5   time        244 non-null    category
 6   size        244 non-null    int64   
dtypes: category(4), float64(2), int64(1)
memory usage: 7.4 KB


In [8]:
tips_data.describe()

Unnamed: 0,total_bill,tip,size
count,244.0,244.0,244.0
mean,19.785943,2.998279,2.569672
std,8.902412,1.383638,0.9511
min,3.07,1.0,1.0
25%,13.3475,2.0,2.0
50%,17.795,2.9,2.0
75%,24.1275,3.5625,3.0
max,50.81,10.0,6.0


In [12]:
tips_data.describe(include=['category'])

Unnamed: 0,sex,smoker,day,time
count,244,244,244,244
unique,2,2,4,2
top,Male,No,Sat,Dinner
freq,157,151,87,176


In [9]:
tips_data.isnull().sum()

total_bill    0
tip           0
sex           0
smoker        0
day           0
time          0
size          0
dtype: int64

**2. CHARTS + CONCLUSIONS**

A. SCATTER

1. total_bill vs. tip

In [73]:
fig = px.scatter(data_frame=tips_data,
                 x='total_bill',
                 y='tip',
                 width=1200,
                 height=700,
                 color='smoker',
                 trendline='ols',
                 facet_col='day',
                 category_orders= {'day': ['Thur', 'Fri', 'Sat', 'Sun']},
                 template='plotly_dark'
                 )
fig.show()

#conclusions:
#1. as total_bill increases, tip increases, sugessting that the is a relationship between these variables,
#2. non smokers are willing to tip more,
#3. positive regression.

2. total_bill vs. size

In [47]:
fig = px.scatter(data_frame=tips_data,
                 x='tip',
                 y='size',
                 width=1200,
                 height=700,
                 color='smoker',
                 trendline='ols',
                 category_orders= {'day': ['Thur', 'Fri', 'Sat', 'Sun']},
                 template='plotly_dark'
                 )
fig.show()

#conclusions:
#1.group size growth affects the increase on tip,
#2.positive regression.

3. sex vs. tip

In [48]:
fig = px.scatter(data_frame=tips_data,
                 x='total_bill',
                 y='tip',
                 width=1200,
                 height=700,
                 color='time',
                 trendline='ols',
                 facet_row='sex',
                 category_orders= {'day': ['Thur', 'Fri', 'Sat', 'Sun']},
                 template='plotly_dark'
                 )
fig.show()

#conclusions:
#1. men tip more.
#2. positive regression.

B. SCATTER_MATRIX

In [61]:
fig = px.scatter_matrix(data_frame=tips_data,
                        color='smoker',
                        dimensions=['total_bill', 'tip', 'size'],
                        title='Scatter Matrix',
                        )

fig.show()

C. PARALLEL COORDINATES/CATEGORIES

In [66]:
fig = px.parallel_coordinates(data_frame=tips_data,
                              color='total_bill')

fig.show()

In [69]:
fig = px.parallel_categories(data_frame=tips_data,
                             color='size')

fig.show()

D. BUBBLE CHART

In [83]:
fig = px.scatter(data_frame=tips_data,
                 x='total_bill',
                 y='tip',
                 size='size',
                 hover_name='day',
                 color='time',
                 labels = {'total_bill': 'Total Bill', 'tip': 'Tip', 'size': 'Size'},
                 title='Bubble Chart: Total Bill vs. Tip',
                 template='plotly_dark'
                 )
fig.update_traces(marker=dict(line=dict(color='black', width=1)))


fig.show()

E. LINE CHART

In [86]:
fig = px.line(data_frame=tips_data,
        x='total_bill',
        y='tip')



In [87]:
# Tworzenie wykresu liniowego
fig = px.line(data_frame=tips_data, x='total_bill', y='tip',
              title='Line Chart: Total Bill vs Tip')
fig.update_layout(xaxis_title='Total Bill', yaxis_title='Tip')
fig.show()