In [2]:
import pandas as pd
import numpy as np

## Series

The first main data type we will learn about for pandas is the Series data type. Let's import Pandas and explore the Series object.

A Series is very similar to a NumPy array (in fact it is built on top of the NumPy array object). What differentiates the NumPy array from a Series, is that a Series can have axis labels, meaning it can be indexed by a label, instead of just a number location. It also doesn't need to hold numeric data, it can hold any arbitrary Python Object.

Let's explore this concept through some examples:

In [4]:
# help(pd.Series)

In [5]:
myindex = ['amir', 'hosein', 'ali']
mydata = [12, 18, 20]

In [6]:
my_ser = pd.Series(data=mydata)

In [7]:
my_ser

0    12
1    18
2    20
dtype: int64

In [8]:
my_ser = pd.Series(data=mydata, index=myindex)

In [9]:
my_ser

amir      12
hosein    18
ali       20
dtype: int64

# DataFrames

Throughout the course, most of our data exploration will be done with DataFrames. DataFrames are an extremely powerful tool and a natural extension of the Pandas Series. By definition all a DataFrame is:

**A Pandas DataFrame consists of multiple Pandas Series that share index values.**

In [11]:
# help(pd.DataFrame)

In [12]:
myindex

['amir', 'hosein', 'ali']

In [13]:
mydata

[12, 18, 20]

In [14]:
df = pd.DataFrame(data=mydata)

In [15]:
df

Unnamed: 0,0
0,12
1,18
2,20


In [16]:
df = pd.DataFrame(data=mydata, index=myindex)

In [17]:
df

Unnamed: 0,0
amir,12
hosein,18
ali,20



# Reading a .csv file for a DataFrame

----

## NOTE: We will go over all kinds of data inputs and outputs (.html, .csv, .xlxs , etc...) later on in the course! For now we just need to read in a simple .csv file.

----

conda install xlrd
conda install openpyxl

In [21]:
df = pd.read_csv('tips.csv')

In [20]:
# file_path = "C:/Users/Amir/Desktop/python/tips.csv"
# df = pd.read_csv(file_path)

In [22]:
df

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608
2,21.01,3.50,Male,No,Sun,Dinner,3,7.00,Travis Walters,6011812112971322,Sun4458
3,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994,Sun5260
4,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221,Sun2251
...,...,...,...,...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3,9.68,Michael Avila,5296068606052842,Sat2657
240,27.18,2.00,Female,Yes,Sat,Dinner,2,13.59,Monica Sanders,3506806155565404,Sat1766
241,22.67,2.00,Male,Yes,Sat,Dinner,2,11.34,Keith Wong,6011891618747196,Sat3880
242,17.82,1.75,Male,No,Sat,Dinner,2,8.91,Dennis Dixon,4375220550950,Sat17


In [23]:
df.columns

Index(['total_bill', 'tip', 'sex', 'smoker', 'day', 'time', 'size',
       'price_per_person', 'Payer Name', 'CC Number', 'Payment ID'],
      dtype='object')

In [24]:
len(df.columns)

11

In [25]:
df.head(5)

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608
2,21.01,3.5,Male,No,Sun,Dinner,3,7.0,Travis Walters,6011812112971322,Sun4458
3,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994,Sun5260
4,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221,Sun2251


In [26]:
df.tail(4)

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID
240,27.18,2.0,Female,Yes,Sat,Dinner,2,13.59,Monica Sanders,3506806155565404,Sat1766
241,22.67,2.0,Male,Yes,Sat,Dinner,2,11.34,Keith Wong,6011891618747196,Sat3880
242,17.82,1.75,Male,No,Sat,Dinner,2,8.91,Dennis Dixon,4375220550950,Sat17
243,18.78,3.0,Female,No,Thur,Dinner,2,9.39,Michelle Hardin,3511451626698139,Thur672


In [27]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 244 entries, 0 to 243
Data columns (total 11 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   total_bill        244 non-null    float64
 1   tip               244 non-null    float64
 2   sex               244 non-null    object 
 3   smoker            244 non-null    object 
 4   day               244 non-null    object 
 5   time              244 non-null    object 
 6   size              244 non-null    int64  
 7   price_per_person  244 non-null    float64
 8   Payer Name        244 non-null    object 
 9   CC Number         244 non-null    int64  
 10  Payment ID        244 non-null    object 
dtypes: float64(3), int64(2), object(6)
memory usage: 21.1+ KB


In [28]:
df.describe()

Unnamed: 0,total_bill,tip,size,price_per_person,CC Number
count,244.0,244.0,244.0,244.0,244.0
mean,19.785943,2.998279,2.569672,7.888197,2563496000000000.0
std,8.902412,1.383638,0.9511,2.914234,2369340000000000.0
min,3.07,1.0,1.0,2.88,60406790000.0
25%,13.3475,2.0,2.0,5.8,30407310000000.0
50%,17.795,2.9,2.0,7.255,3525318000000000.0
75%,24.1275,3.5625,3.0,9.39,4553675000000000.0
max,50.81,10.0,6.0,20.27,6596454000000000.0


In [29]:
df.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
total_bill,244.0,19.78594,8.902412,3.07,13.3475,17.795,24.1275,50.81
tip,244.0,2.998279,1.383638,1.0,2.0,2.9,3.5625,10.0
size,244.0,2.569672,0.9510998,1.0,2.0,2.0,3.0,6.0
price_per_person,244.0,7.888197,2.914234,2.88,5.8,7.255,9.39,20.27
CC Number,244.0,2563496000000000.0,2369340000000000.0,60406790000.0,30407310000000.0,3525318000000000.0,4553675000000000.0,6596454000000000.0


In [31]:
df.head(3)

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608
2,21.01,3.5,Male,No,Sun,Dinner,3,7.0,Travis Walters,6011812112971322,Sun4458


In [32]:
df['total_bill']

0      16.99
1      10.34
2      21.01
3      23.68
4      24.59
       ...  
239    29.03
240    27.18
241    22.67
242    17.82
243    18.78
Name: total_bill, Length: 244, dtype: float64

In [33]:
df['smoker']

0       No
1       No
2       No
3       No
4       No
      ... 
239     No
240    Yes
241    Yes
242     No
243     No
Name: smoker, Length: 244, dtype: object

In [34]:
df['name']

KeyError: 'name'

In [36]:
df2 = df[['total_bill', 'tip']]

In [37]:
df2

Unnamed: 0,total_bill,tip
0,16.99,1.01
1,10.34,1.66
2,21.01,3.50
3,23.68,3.31
4,24.59,3.61
...,...,...
239,29.03,5.92
240,27.18,2.00
241,22.67,2.00
242,17.82,1.75


In [38]:
df.head(2)

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608


In [40]:
df['tip_percentage'] = 100 * df['tip'] / df['total_bill']

In [41]:
df['tip_percentage']

0       5.944673
1      16.054159
2      16.658734
3      13.978041
4      14.680765
         ...    
239    20.392697
240     7.358352
241     8.822232
242     9.820426
243    15.974441
Name: tip_percentage, Length: 244, dtype: float64

In [42]:
df.head(2)

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID,tip_percentage
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959,5.944673
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608,16.054159


In [43]:
df['tip_percentage'] = np.round(df['tip_percentage'], 2)

In [44]:
df['tip_percentage']

0       5.94
1      16.05
2      16.66
3      13.98
4      14.68
       ...  
239    20.39
240     7.36
241     8.82
242     9.82
243    15.97
Name: tip_percentage, Length: 244, dtype: float64

In [45]:
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID,tip_percentage
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959,5.94
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608,16.05
2,21.01,3.5,Male,No,Sun,Dinner,3,7.0,Travis Walters,6011812112971322,Sun4458,16.66
3,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994,Sun5260,13.98
4,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221,Sun2251,14.68


In [47]:
# df = df.drop('tip_percentage', axis=1)

In [48]:
df.drop("tip_percentage", axis=1, inplace=True)

In [49]:
df.head(3)

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608
2,21.01,3.5,Male,No,Sun,Dinner,3,7.0,Travis Walters,6011812112971322,Sun4458


In [50]:
df.index

RangeIndex(start=0, stop=244, step=1)

In [55]:
df.set_index("Payment ID", inplace=True)

In [56]:
df.head()

Unnamed: 0_level_0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number
Payment ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Sun2959,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410
Sun4608,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230
Sun4458,21.01,3.5,Male,No,Sun,Dinner,3,7.0,Travis Walters,6011812112971322
Sun5260,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994
Sun2251,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221


In [57]:
df.reset_index(inplace=True)

In [58]:
df.head()

Unnamed: 0,Payment ID,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number
0,Sun2959,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410
1,Sun4608,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230
2,Sun4458,21.01,3.5,Male,No,Sun,Dinner,3,7.0,Travis Walters,6011812112971322
3,Sun5260,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994
4,Sun2251,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221


In [59]:
gorgan_temp = pd.read_csv('gorgan.csv')

In [60]:
gorgan_temp.head()

Unnamed: 0,YEAR,JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC,D-J-F,M-A-M,J-J-A,S-O-N,metANN
0,1952,999.9,999.9,999.9,999.9,999.9,999.9,999.9,999.9,999.9,999.9,999.9,7.27,999.9,999.9,999.9,999.9,999.9
1,1953,7.83,9.98,7.27,10.67,999.9,999.9,25.28,26.58,22.22,16.03,999.9,999.9,8.36,11.62,25.2,16.32,15.37
2,1954,999.9,999.9,999.9,10.73,20.58,24.88,27.83,27.38,24.63,19.72,13.18,8.38,999.9,13.15,26.7,19.18,16.6
3,1955,7.23,11.63,9.08,13.63,19.38,23.47,27.38,27.13,23.22,18.42,13.42,8.88,9.08,14.03,25.99,18.35,16.86
4,1956,7.58,6.63,6.83,14.08,16.88,22.43,24.33,26.03,21.03,16.58,12.78,7.27,7.7,12.6,24.26,16.8,15.34


In [61]:
gorgan_temp.describe()

Unnamed: 0,YEAR,JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC,D-J-F,M-A-M,J-J-A,S-O-N,metANN
count,72.0,72.0,72.0,72.0,72.0,72.0,72.0,72.0,72.0,72.0,72.0,72.0,72.0,72.0,72.0,72.0,72.0,72.0
mean,1987.5,144.443472,145.082222,161.092917,151.407917,183.432917,173.535833,175.426806,148.339722,145.615278,140.869028,135.8825,159.83625,131.4275,151.615417,161.245278,140.78625,166.78375
std,20.92845,345.975977,345.717267,358.704038,343.157429,367.699647,353.381865,352.572665,324.120861,325.1578,326.965528,328.863793,359.239521,330.557581,343.072031,339.177621,326.995048,356.266758
min,1952.0,1.27,1.52,5.44,10.67,16.87,21.21,23.87,24.02,20.32,14.67,7.54,4.52,3.85,11.62,23.89,15.7,14.21
25%,1969.75,5.8575,6.235,8.8575,13.4875,19.44,23.61,25.8475,25.8325,22.46,17.08,11.575,7.5575,6.8275,14.235,25.175,17.375,16.0425
50%,1987.5,7.175,8.005,10.615,15.0,20.54,24.845,26.965,26.935,23.855,18.34,13.19,8.755,7.835,14.98,26.19,18.325,16.745
75%,2005.25,8.5825,9.145,11.9825,16.37,21.9375,26.345,28.4725,28.3175,25.0975,19.825,14.23,9.9825,8.6925,16.115,27.6075,19.3625,17.7025
max,2023.0,999.9,999.9,999.9,999.9,999.9,999.9,999.9,999.9,999.9,999.9,999.9,999.9,999.9,999.9,999.9,999.9,999.9


In [62]:
gorgan_temp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 72 entries, 0 to 71
Data columns (total 18 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   YEAR    72 non-null     int64  
 1   JAN     72 non-null     float64
 2   FEB     72 non-null     float64
 3   MAR     72 non-null     float64
 4   APR     72 non-null     float64
 5   MAY     72 non-null     float64
 6   JUN     72 non-null     float64
 7   JUL     72 non-null     float64
 8   AUG     72 non-null     float64
 9   SEP     72 non-null     float64
 10  OCT     72 non-null     float64
 11  NOV     72 non-null     float64
 12  DEC     72 non-null     float64
 13  D-J-F   72 non-null     float64
 14  M-A-M   72 non-null     float64
 15  J-J-A   72 non-null     float64
 16  S-O-N   72 non-null     float64
 17  metANN  72 non-null     float64
dtypes: float64(17), int64(1)
memory usage: 10.2 KB


In [64]:
df.head(1)

Unnamed: 0,Payment ID,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number
0,Sun2959,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410


In [66]:
df.iloc[122]

Payment ID                 Thur3579
total_bill                    14.26
tip                             2.5
sex                            Male
smoker                           No
day                            Thur
time                          Lunch
size                              2
price_per_person               7.13
Payer Name             Perry Garcia
CC Number           180034646320219
Name: 122, dtype: object

In [67]:
df.iloc[:4]

Unnamed: 0,Payment ID,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number
0,Sun2959,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410
1,Sun4608,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230
2,Sun4458,21.01,3.5,Male,No,Sun,Dinner,3,7.0,Travis Walters,6011812112971322
3,Sun5260,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994


In [68]:
df.set_index("Payment ID", inplace=True)

In [69]:
df.head()

Unnamed: 0_level_0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number
Payment ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Sun2959,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410
Sun4608,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230
Sun4458,21.01,3.5,Male,No,Sun,Dinner,3,7.0,Travis Walters,6011812112971322
Sun5260,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994
Sun2251,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221


In [70]:
df.loc['Sun2959']

total_bill                       16.99
tip                               1.01
sex                             Female
smoker                              No
day                                Sun
time                            Dinner
size                                 2
price_per_person                  8.49
Payer Name          Christy Cunningham
CC Number             3560325168603410
Name: Sun2959, dtype: object

# Conditional Filtering

In [71]:
df.reset_index(inplace=True)

In [72]:
df.head(2)

Unnamed: 0,Payment ID,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number
0,Sun2959,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410
1,Sun4608,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230


In [73]:
bool_series = df['total_bill'] > 30

In [76]:
df[bool_series]

Unnamed: 0,Payment ID,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number
11,Sun6686,35.26,5.0,Female,No,Sun,Dinner,4,8.82,Diane Macias,4577817359320969
23,Sat239,39.42,7.58,Male,No,Sat,Dinner,4,9.86,Lance Peterson,3542584061609808
39,Sat6373,31.27,5.0,Male,No,Sat,Dinner,3,10.42,Mr. Brandon Berry,6011525851069856
44,Sun2274,30.4,5.6,Male,No,Sun,Dinner,4,7.6,Todd Cooper,503846761263
47,Sun9677,32.4,6.0,Male,No,Sun,Dinner,4,8.1,James Barnes,3552002592874186
52,Sun6165,34.81,5.2,Female,No,Sun,Dinner,4,8.7,Emily Daniel,4291280793094374
56,Sat8903,38.01,3.0,Male,Yes,Sat,Dinner,4,9.5,James Christensen DDS,349793629453226
59,Sat8139,48.27,6.73,Male,No,Sat,Dinner,4,12.07,Brian Ortiz,6596453823950595
83,Thur8801,32.68,5.0,Male,Yes,Thur,Lunch,2,16.34,Daniel Murphy,5356177501009133
85,Thur7972,34.83,5.17,Female,No,Thur,Lunch,4,8.71,Shawna Cook,6011787464177340


In [77]:
df[df['smoker'] == 'No']

Unnamed: 0,Payment ID,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number
0,Sun2959,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410
1,Sun4608,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230
2,Sun4458,21.01,3.50,Male,No,Sun,Dinner,3,7.00,Travis Walters,6011812112971322
3,Sun5260,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994
4,Sun2251,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221
...,...,...,...,...,...,...,...,...,...,...,...
235,Sat4615,10.07,1.25,Male,No,Sat,Dinner,2,5.04,Sean Gonzalez,3534021246117605
238,Sat9777,35.83,4.67,Female,No,Sat,Dinner,3,11.94,Kimberly Crane,676184013727
239,Sat2657,29.03,5.92,Male,No,Sat,Dinner,3,9.68,Michael Avila,5296068606052842
242,Sat17,17.82,1.75,Male,No,Sat,Dinner,2,8.91,Dennis Dixon,4375220550950


In [80]:
df[(df['smoker'] == 'No') & (df['total_bill'] > 30)]

Unnamed: 0,Payment ID,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number
11,Sun6686,35.26,5.0,Female,No,Sun,Dinner,4,8.82,Diane Macias,4577817359320969
23,Sat239,39.42,7.58,Male,No,Sat,Dinner,4,9.86,Lance Peterson,3542584061609808
39,Sat6373,31.27,5.0,Male,No,Sat,Dinner,3,10.42,Mr. Brandon Berry,6011525851069856
44,Sun2274,30.4,5.6,Male,No,Sun,Dinner,4,7.6,Todd Cooper,503846761263
47,Sun9677,32.4,6.0,Male,No,Sun,Dinner,4,8.1,James Barnes,3552002592874186
52,Sun6165,34.81,5.2,Female,No,Sun,Dinner,4,8.7,Emily Daniel,4291280793094374
59,Sat8139,48.27,6.73,Male,No,Sat,Dinner,4,12.07,Brian Ortiz,6596453823950595
85,Thur7972,34.83,5.17,Female,No,Thur,Lunch,4,8.71,Shawna Cook,6011787464177340
112,Sun591,38.07,4.0,Male,No,Sun,Dinner,3,12.69,Jeff Lopez,3572865915176463
141,Thur1025,34.3,6.7,Male,No,Thur,Lunch,6,5.72,Steven Carlson,3526515703718508


In [81]:
df[(df['smoker'] == 'No') & (df['total_bill'] > 30)].info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 15 entries, 11 to 238
Data columns (total 11 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Payment ID        15 non-null     object 
 1   total_bill        15 non-null     float64
 2   tip               15 non-null     float64
 3   sex               15 non-null     object 
 4   smoker            15 non-null     object 
 5   day               15 non-null     object 
 6   time              15 non-null     object 
 7   size              15 non-null     int64  
 8   price_per_person  15 non-null     float64
 9   Payer Name        15 non-null     object 
 10  CC Number         15 non-null     int64  
dtypes: float64(3), int64(2), object(6)
memory usage: 1.4+ KB


In [84]:
df[(df['total_bill'] > 30) & ~(df['sex']=='Male')]

Unnamed: 0,Payment ID,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number
11,Sun6686,35.26,5.0,Female,No,Sun,Dinner,4,8.82,Diane Macias,4577817359320969
52,Sun6165,34.81,5.2,Female,No,Sun,Dinner,4,8.7,Emily Daniel,4291280793094374
85,Thur7972,34.83,5.17,Female,No,Thur,Lunch,4,8.71,Shawna Cook,6011787464177340
102,Sat6240,44.3,2.5,Female,Yes,Sat,Dinner,3,14.77,Heather Cohen,379771118886604
197,Thur9313,43.11,5.0,Female,Yes,Thur,Lunch,4,10.78,Brooke Soto,5544902205760175
219,Sat8863,30.14,3.09,Female,Yes,Sat,Dinner,4,7.54,Shelby House,502097403252
238,Sat9777,35.83,4.67,Female,No,Sat,Dinner,3,11.94,Kimberly Crane,676184013727


In [85]:
df[(df['total_bill'] > 30) & (df['sex'] != 'Male')]

Unnamed: 0,Payment ID,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number
11,Sun6686,35.26,5.0,Female,No,Sun,Dinner,4,8.82,Diane Macias,4577817359320969
52,Sun6165,34.81,5.2,Female,No,Sun,Dinner,4,8.7,Emily Daniel,4291280793094374
85,Thur7972,34.83,5.17,Female,No,Thur,Lunch,4,8.71,Shawna Cook,6011787464177340
102,Sat6240,44.3,2.5,Female,Yes,Sat,Dinner,3,14.77,Heather Cohen,379771118886604
197,Thur9313,43.11,5.0,Female,Yes,Thur,Lunch,4,10.78,Brooke Soto,5544902205760175
219,Sat8863,30.14,3.09,Female,Yes,Sat,Dinner,4,7.54,Shelby House,502097403252
238,Sat9777,35.83,4.67,Female,No,Sat,Dinner,3,11.94,Kimberly Crane,676184013727


In [86]:
df[(df['day'] == 'Sun') | (df['day'] == 'Sat')]

Unnamed: 0,Payment ID,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number
0,Sun2959,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410
1,Sun4608,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230
2,Sun4458,21.01,3.50,Male,No,Sun,Dinner,3,7.00,Travis Walters,6011812112971322
3,Sun5260,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994
4,Sun2251,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221
...,...,...,...,...,...,...,...,...,...,...,...
238,Sat9777,35.83,4.67,Female,No,Sat,Dinner,3,11.94,Kimberly Crane,676184013727
239,Sat2657,29.03,5.92,Male,No,Sat,Dinner,3,9.68,Michael Avila,5296068606052842
240,Sat1766,27.18,2.00,Female,Yes,Sat,Dinner,2,13.59,Monica Sanders,3506806155565404
241,Sat3880,22.67,2.00,Male,Yes,Sat,Dinner,2,11.34,Keith Wong,6011891618747196


In [90]:
w = df[df['day'].isin(['Sat', 'Sun'])]

In [92]:
w.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 163 entries, 0 to 242
Data columns (total 11 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Payment ID        163 non-null    object 
 1   total_bill        163 non-null    float64
 2   tip               163 non-null    float64
 3   sex               163 non-null    object 
 4   smoker            163 non-null    object 
 5   day               163 non-null    object 
 6   time              163 non-null    object 
 7   size              163 non-null    int64  
 8   price_per_person  163 non-null    float64
 9   Payer Name        163 non-null    object 
 10  CC Number         163 non-null    int64  
dtypes: float64(3), int64(2), object(6)
memory usage: 15.3+ KB
