<a href="https://colab.research.google.com/github/davidofitaly/practice_data/blob/main/tips/01.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**1. INITIAL ANALYSIS**

In [149]:
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

tips_data = sns.load_dataset('tips')

In [123]:
tips_data.head()


# explanation of the columns:
# total_bill: total bill amount
# tip: tip value
# sex: sex of the person -> Female / Male
# smoker: whether the person smokes -> No / Yes
# day: day of the week -> Thursday, Friday, Saturday, Sunday
# time: meal time -> Dinner / Lunch
# size: number of people at the table


Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [124]:
tips_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 244 entries, 0 to 243
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   total_bill  244 non-null    float64 
 1   tip         244 non-null    float64 
 2   sex         244 non-null    category
 3   smoker      244 non-null    category
 4   day         244 non-null    category
 5   time        244 non-null    category
 6   size        244 non-null    int64   
dtypes: category(4), float64(2), int64(1)
memory usage: 7.4 KB


In [125]:
tips_data.describe()

Unnamed: 0,total_bill,tip,size
count,244.0,244.0,244.0
mean,19.785943,2.998279,2.569672
std,8.902412,1.383638,0.9511
min,3.07,1.0,1.0
25%,13.3475,2.0,2.0
50%,17.795,2.9,2.0
75%,24.1275,3.5625,3.0
max,50.81,10.0,6.0


In [126]:
tips_data.describe(include=['category'])

Unnamed: 0,sex,smoker,day,time
count,244,244,244,244
unique,2,2,4,2
top,Male,No,Sat,Dinner
freq,157,151,87,176


In [127]:
tips_data.isnull().sum()

total_bill    0
tip           0
sex           0
smoker        0
day           0
time          0
size          0
dtype: int64

**2. CHARTS + CONCLUSIONS**

A. SCATTER

1. total_bill vs. tip

In [128]:
fig = px.scatter(data_frame=tips_data,
                 x='total_bill',
                 y='tip',
                 width=1200,
                 height=700,
                 color='smoker',
                 trendline='ols',
                 facet_col='day',
                 category_orders= {'day': ['Thur', 'Fri', 'Sat', 'Sun']},
                 template='plotly_dark'
                 )
fig.show()

#conclusions:
#1. as total_bill increases, tip increases, sugessting that the is a relationship between these variables,
#2. non smokers are willing to tip more,
#3. positive regression.

2. total_bill vs. size

In [129]:
fig = px.scatter(data_frame=tips_data,
                 x='tip',
                 y='size',
                 width=1200,
                 height=700,
                 color='smoker',
                 trendline='ols',
                 category_orders= {'day': ['Thur', 'Fri', 'Sat', 'Sun']},
                 template='plotly_dark'
                 )
fig.show()

#conclusions:
#1.group size growth affects the increase on tip,
#2.positive regression.

3. sex vs. tip

In [130]:
fig = px.scatter(data_frame=tips_data,
                 x='total_bill',
                 y='tip',
                 width=1200,
                 height=700,
                 color='time',
                 trendline='ols',
                 facet_row='sex',
                 category_orders= {'day': ['Thur', 'Fri', 'Sat', 'Sun']},
                 template='plotly_dark'
                 )
fig.show()

#conclusions:
#1. men tip more.
#2. positive regression.

B. SCATTER_MATRIX

In [142]:
fig = px.scatter_matrix(data_frame=tips_data,
                        color='sex',
                        dimensions=['total_bill', 'tip', 'size'],
                        title='Scatter Matrix',
                        template='seaborn'
                        )

fig.show()

C. PARALLEL COORDINATES/CATEGORIES

In [133]:
fig = px.parallel_coordinates(data_frame=tips_data,
                              color='total_bill')

fig.show()

In [134]:
fig = px.parallel_categories(data_frame=tips_data,
                             color='size')

fig.show()

D. BUBBLE CHART

In [135]:
fig = px.scatter(data_frame=tips_data,
                 x='total_bill',
                 y='tip',
                 size='size',
                 hover_name='day',
                 color='time',
                 labels = {'total_bill': 'Total Bill', 'tip': 'Tip', 'size': 'Size'},
                 title='Bubble Chart: Total Bill vs. Tip',
                 template='plotly_dark'
                 )
fig.update_traces(marker=dict(line=dict(color='black', width=1)))


fig.show()

E. LINE CHART

In [158]:
import pandas as pd

# Calculating the average total_bill value for each size value
mean_total_bill_by_size = tips_data.groupby('size')['total_bill'].mean()

# Create a new data frame with calculated average total_bill values for each size value
data_tips_new = pd.DataFrame(mean_total_bill_by_size).reset_index()

# Naming a column for average total_bill values
data_tips_new.columns = ['size', 'mean_total_bill']


fig = px.line(data_frame=data_tips_new,
              x='size',
              y='mean_total_bill',
              template='seaborn')

fig.show()




F. BAR CHART

In [159]:
# Calculating the average tip value for each gender
mean_tip_by_sex = tips_data.groupby('sex')['tip'].mean().reset_index()


fig = px.bar(data_frame=mean_tip_by_sex,
             x='sex',
             y='tip',
             title='Mean Tip by Sex',
             labels={'sex': 'Sex', 'tip': 'Mean Tip'},
             template='ggplot2')

fig.show()


G. PIE BREAST

1. smokers vs. non smokers

In [157]:
smoker_counts = tips_data['smoker'].value_counts()
smoker_counts_yes = smoker_counts['Yes']
smoker_counts_no = smoker_counts['No']

fig = go.Figure(data=[
    go.Pie(labels=['Smoker', 'Non Smokers'],
           values=[smoker_counts_yes,smoker_counts_no],

)])
fig.update_layout(title='Smokers vs. Non Smokers', template='ggplot2')

fig.show()

2. number of bills by party size

In [173]:
import plotly.graph_objects as go

# Calculate the number of accounts for each group size
count_by_size = tips_data['size'].value_counts()

# Creating a pie chart
fig = go.Figure(data=[go.Pie(labels=count_by_size.index,
                             values=count_by_size.values,
                             textinfo='percent+label',

                             )])

fig.update_layout(title='Number of Bills by Party Size', template='ggplot2')
fig.update_traces(textfont_color='black')

fig.show()


H. INTERACTIVE TABLE

In [187]:


fig = go.Figure(go.Table(
    header=dict(values=tips_data.columns,
                fill_color='#86FCFF',
                line_color='gray'),
    cells=dict(values=[tips_data[col] for col in tips_data.columns],
               fill_color='lightblue',
               line_color='gray')


))
fig.update_layout(title='Data tips_data table ')
fig.show()

I. HISTOGRAM

1. distribution of the variable size

In [200]:
fig = px.histogram(data_frame=tips_data,
                   x='size',
                   title='distribution of the variable size',
                   template='ggplot2',
                   histnorm='probability density',
                   facet_row='time',
                   color='time',
                   )
fig.show()

2. distribution of the variable total_bil

In [201]:
fig = px.histogram(data_frame=tips_data,
                   x='total_bill',
                   title='distribution of the variable total_bill',
                   template='ggplot2',
                   histnorm='probability density',
                   facet_row='time',
                   color='time'
                   )
fig.show()

J. HISTOGRAM2D

In [224]:
fig = go.Figure(go.Histogram2d(
    x = tips_data['total_bill'],
    y= tips_data['size'],
    nbinsx=10,
    nbinsy=10
))
fig.add_trace(go.Scatter(
    x = tips_data['total_bill'],
    y = tips_data['size'],
    mode='markers',
    marker = {
        'symbol': 'x',
        'opacity': 0.7,
        'color': 'white',
        'size': 8,
        'line': {
            'width':1
        }
    }


))

fig.update_layout(title='Histogram2d',
                  xaxis_title = 'Total Bill',
                  yaxis_title= 'Tip')
fig.show()