In [1]:
import numpy as np
import pandas as pd

# ------------------------------------------------------------
# 1. Creating a Random Score Matrix
# ------------------------------------------------------------

np.random.seed(101)                    # Set seed for reproducibility
scores = np.random.randint(0, 101, (4, 3))   # 4 rows, 3 columns (scores 0–100)
print(scores)

[[95 11 81]
 [70 63 87]
 [75  9 77]
 [40  4 63]]


In [2]:
# ------------------------------------------------------------
# 2. Creating DataFrames in Multiple Ways
# ------------------------------------------------------------

players = ['Virat', 'Rohit', 'MSDhoni', 'KL.Rahul']
matches = ['Match1', 'Match2', 'Match3']

# DataFrame with default row/column labels
df = pd.DataFrame(data=scores)
print(df)

# DataFrame with custom row labels
df = pd.DataFrame(data=scores, index=players)
print(df)

# DataFrame with custom rows + custom column labels
df = pd.DataFrame(data=scores, index=players, columns=matches)
print(df)


    0   1   2
0  95  11  81
1  70  63  87
2  75   9  77
3  40   4  63
           0   1   2
Virat     95  11  81
Rohit     70  63  87
MSDhoni   75   9  77
KL.Rahul  40   4  63
          Match1  Match2  Match3
Virat         95      11      81
Rohit         70      63      87
MSDhoni       75       9      77
KL.Rahul      40       4      63


In [3]:
# ------------------------------------------------------------
# 3. Basic DataFrame Information
# ------------------------------------------------------------

print(df.info())       # Info about DataFrame structure

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, Virat to KL.Rahul
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   Match1  4 non-null      int64
 1   Match2  4 non-null      int64
 2   Match3  4 non-null      int64
dtypes: int64(3)
memory usage: 128.0+ bytes
None


In [4]:
# ------------------------------------------------------------
# 4. Loading and Exploring a CSV File
# ------------------------------------------------------------

df = pd.read_csv('tips.csv')   # Load dataset
print(df.columns)              # Column names
print(df.index)                # Row index range
print(df.head(3))              # First 3 rows
print(df.tail(2))              # Last 2 rows
print(df.info())               # Column info, data types, nulls
print(len(df))                 # Number of rows
print(df.describe())           # Statistical summary
print(df.describe().transpose())  # Summary transposed for readability

Index(['total_bill', 'tip', 'gender', 'smoker', 'day', 'time', 'size',
       'price_per_person', 'Payer Name', 'CC Number', 'Payment ID'],
      dtype='object')
RangeIndex(start=0, stop=244, step=1)
   total_bill   tip  gender smoker  day    time  size  price_per_person  \
0       16.99  1.01  Female     No  Sun  Dinner     2              8.49   
1       10.34  1.66    Male     No  Sun  Dinner     3              3.45   
2       21.01  3.50    Male     No  Sun  Dinner     3              7.00   

           Payer Name     CC Number Payment ID  
0  Christy Cunningham  3.560330e+15    Sun2959  
1      Douglas Tucker  4.478070e+15    Sun4608  
2      Travis Walters  6.011810e+15    Sun4458  
     total_bill   tip  gender smoker   day    time  size  price_per_person  \
242       17.82  1.75    Male     No   Sat  Dinner     2              8.91   
243       18.78  3.00  Female     No  Thur  Dinner     2              9.39   

          Payer Name     CC Number Payment ID  
242     Dennis Dixon

In [5]:
# ------------------------------------------------------------
# 5. Formatting Output Display
# ------------------------------------------------------------

pd.set_option('display.float_format', '{:.0f}'.format)   # No decimals
print(df)

     total_bill  tip  gender smoker   day    time  size  price_per_person  \
0            17    1  Female     No   Sun  Dinner     2                 8   
1            10    2    Male     No   Sun  Dinner     3                 3   
2            21    4    Male     No   Sun  Dinner     3                 7   
3            24    3    Male     No   Sun  Dinner     2                12   
4            25    4  Female     No   Sun  Dinner     4                 6   
..          ...  ...     ...    ...   ...     ...   ...               ...   
239          29    6    Male     No   Sat  Dinner     3                10   
240          27    2  Female    Yes   Sat  Dinner     2                14   
241          23    2    Male    Yes   Sat  Dinner     2                11   
242          18    2    Male     No   Sat  Dinner     2                 9   
243          19    3  Female     No  Thur  Dinner     2                 9   

             Payer Name        CC Number Payment ID  
0    Christy Cunningh

In [6]:
# ------------------------------------------------------------
# 6. Creating New Columns
# ------------------------------------------------------------

df['price_per_person'] = np.round(df['price_per_person'], 2)   # Round to 2 decimals
print(df.head())

   total_bill  tip  gender smoker  day    time  size  price_per_person  \
0          17    1  Female     No  Sun  Dinner     2                 8   
1          10    2    Male     No  Sun  Dinner     3                 3   
2          21    4    Male     No  Sun  Dinner     3                 7   
3          24    3    Male     No  Sun  Dinner     2                12   
4          25    4  Female     No  Sun  Dinner     4                 6   

           Payer Name        CC Number Payment ID  
0  Christy Cunningham 3560330000000000    Sun2959  
1      Douglas Tucker 4478070000000000    Sun4608  
2      Travis Walters 6011810000000000    Sun4458  
3    Nathaniel Harris 4676140000000000    Sun5260  
4        Tonya Carter 4832730000000000    Sun2251  


In [8]:
# ------------------------------------------------------------
# 7. Dropping Columns
# ------------------------------------------------------------

df = df.drop('CC Number', axis=1)   # axis=1 → drop column
print(df.head())

# (Alternative)
# df.drop('tip_percentage', axis=1, inplace=True)

   total_bill  tip  gender smoker  day    time  size  price_per_person  \
0          17    1  Female     No  Sun  Dinner     2                 8   
1          10    2    Male     No  Sun  Dinner     3                 3   
2          21    4    Male     No  Sun  Dinner     3                 7   
3          24    3    Male     No  Sun  Dinner     2                12   
4          25    4  Female     No  Sun  Dinner     4                 6   

           Payer Name Payment ID  
0  Christy Cunningham    Sun2959  
1      Douglas Tucker    Sun4608  
2      Travis Walters    Sun4458  
3    Nathaniel Harris    Sun5260  
4        Tonya Carter    Sun2251  


In [9]:
# ------------------------------------------------------------
# 8. Setting and Resetting Index
# ------------------------------------------------------------

df = df.set_index('Payment ID')
print(df.head())

df = df.reset_index()   # Bring index back as a normal column
print(df.head())

            total_bill  tip  gender smoker  day    time  size  \
Payment ID                                                      
Sun2959             17    1  Female     No  Sun  Dinner     2   
Sun4608             10    2    Male     No  Sun  Dinner     3   
Sun4458             21    4    Male     No  Sun  Dinner     3   
Sun5260             24    3    Male     No  Sun  Dinner     2   
Sun2251             25    4  Female     No  Sun  Dinner     4   

            price_per_person          Payer Name  
Payment ID                                        
Sun2959                    8  Christy Cunningham  
Sun4608                    3      Douglas Tucker  
Sun4458                    7      Travis Walters  
Sun5260                   12    Nathaniel Harris  
Sun2251                    6        Tonya Carter  
  Payment ID  total_bill  tip  gender smoker  day    time  size  \
0    Sun2959          17    1  Female     No  Sun  Dinner     2   
1    Sun4608          10    2    Male     No  Sun  Di

In [10]:
# ------------------------------------------------------------
# 9. Boolean Filtering (Row Selection)
# ------------------------------------------------------------

print(df['total_bill'] < 30)              # Boolean Series (True/False)
print(df[df['total_bill'] < 30])          # Filter rows where bill < 30

print(df[df['gender'] == 'Male'])         # Male customers

# AND condition
print(df[(df['total_bill'] < 30) & (df['gender'] == 'Male')])

# NOT (~) operator
print(df[(df['total_bill'] < 30) & ~(df['gender'] == 'Male')])

# OR condition
print(df[(df['total_bill'] < 30) | (df['tip'] > 5)])

0      True
1      True
2      True
3      True
4      True
       ... 
239    True
240    True
241    True
242    True
243    True
Name: total_bill, Length: 244, dtype: bool
    Payment ID  total_bill  tip  gender smoker   day    time  size  \
0      Sun2959          17    1  Female     No   Sun  Dinner     2   
1      Sun4608          10    2    Male     No   Sun  Dinner     3   
2      Sun4458          21    4    Male     No   Sun  Dinner     3   
3      Sun5260          24    3    Male     No   Sun  Dinner     2   
4      Sun2251          25    4  Female     No   Sun  Dinner     4   
..         ...         ...  ...     ...    ...   ...     ...   ...   
239    Sat2657          29    6    Male     No   Sat  Dinner     3   
240    Sat1766          27    2  Female    Yes   Sat  Dinner     2   
241    Sat3880          23    2    Male    Yes   Sat  Dinner     2   
242      Sat17          18    2    Male     No   Sat  Dinner     2   
243    Thur672          19    3  Female     No  Thur  D

In [11]:
# ------------------------------------------------------------
# 10. Using isin() for Multiple Values
# ------------------------------------------------------------

options = ['Sat', 'Sun']
print(df[df['day'].isin(options)])    # Filter rows where day is Sat or Sun

    Payment ID  total_bill  tip  gender smoker  day    time  size  \
0      Sun2959          17    1  Female     No  Sun  Dinner     2   
1      Sun4608          10    2    Male     No  Sun  Dinner     3   
2      Sun4458          21    4    Male     No  Sun  Dinner     3   
3      Sun5260          24    3    Male     No  Sun  Dinner     2   
4      Sun2251          25    4  Female     No  Sun  Dinner     4   
..         ...         ...  ...     ...    ...  ...     ...   ...   
238    Sat9777          36    5  Female     No  Sat  Dinner     3   
239    Sat2657          29    6    Male     No  Sat  Dinner     3   
240    Sat1766          27    2  Female    Yes  Sat  Dinner     2   
241    Sat3880          23    2    Male    Yes  Sat  Dinner     2   
242      Sat17          18    2    Male     No  Sat  Dinner     2   

     price_per_person          Payer Name  
0                   8  Christy Cunningham  
1                   3      Douglas Tucker  
2                   7      Travis Walte

In [13]:
# ------------------------------------------------------------
# 11. Multiple Ways to Filter Rows (Equivalent)
# ------------------------------------------------------------

filtered1 = df[(df['gender'] == 'Female') & (df['tip'] >= 4)]
filtered2 = df.query("gender == 'Female' and tip >= 4")
filtered3 = df.loc[(df['gender'] == 'Female') & (df['tip'] >= 4)]
filtered4 = df.where((df['gender'] == 'Female') & (df['tip'] >= 4))

print(filtered1)

    Payment ID  total_bill  tip  gender smoker   day    time  size  \
11     Sun6686          35    5  Female     No   Sun  Dinner     4   
52     Sun6165          35    5  Female     No   Sun  Dinner     4   
73     Sat6065          25    5  Female    Yes   Sat  Dinner     2   
85    Thur7972          35    5  Female     No  Thur   Lunch     4   
93     Fri6963          16    4  Female    Yes   Fri  Dinner     2   
104    Sat3194          21    4  Female     No   Sat  Dinner     2   
109    Sat2614          14    4  Female    Yes   Sat  Dinner     2   
114    Sun6492          26    4  Female     No   Sun  Dinner     3   
125   Thur3948          30    4  Female     No  Thur   Lunch     6   
143   Thur6179          27    5  Female     No  Thur   Lunch     6   
155    Sun9176          30    5  Female     No   Sun  Dinner     5   
178    Sun4598          10    4  Female    Yes   Sun  Dinner     2   
191    Thur967          20    4  Female    Yes  Thur   Lunch     2   
197   Thur9313      

In [14]:
print(filtered2)

    Payment ID  total_bill  tip  gender smoker   day    time  size  \
11     Sun6686          35    5  Female     No   Sun  Dinner     4   
52     Sun6165          35    5  Female     No   Sun  Dinner     4   
73     Sat6065          25    5  Female    Yes   Sat  Dinner     2   
85    Thur7972          35    5  Female     No  Thur   Lunch     4   
93     Fri6963          16    4  Female    Yes   Fri  Dinner     2   
104    Sat3194          21    4  Female     No   Sat  Dinner     2   
109    Sat2614          14    4  Female    Yes   Sat  Dinner     2   
114    Sun6492          26    4  Female     No   Sun  Dinner     3   
125   Thur3948          30    4  Female     No  Thur   Lunch     6   
143   Thur6179          27    5  Female     No  Thur   Lunch     6   
155    Sun9176          30    5  Female     No   Sun  Dinner     5   
178    Sun4598          10    4  Female    Yes   Sun  Dinner     2   
191    Thur967          20    4  Female    Yes  Thur   Lunch     2   
197   Thur9313      

In [15]:
print(filtered3)

    Payment ID  total_bill  tip  gender smoker   day    time  size  \
11     Sun6686          35    5  Female     No   Sun  Dinner     4   
52     Sun6165          35    5  Female     No   Sun  Dinner     4   
73     Sat6065          25    5  Female    Yes   Sat  Dinner     2   
85    Thur7972          35    5  Female     No  Thur   Lunch     4   
93     Fri6963          16    4  Female    Yes   Fri  Dinner     2   
104    Sat3194          21    4  Female     No   Sat  Dinner     2   
109    Sat2614          14    4  Female    Yes   Sat  Dinner     2   
114    Sun6492          26    4  Female     No   Sun  Dinner     3   
125   Thur3948          30    4  Female     No  Thur   Lunch     6   
143   Thur6179          27    5  Female     No  Thur   Lunch     6   
155    Sun9176          30    5  Female     No   Sun  Dinner     5   
178    Sun4598          10    4  Female    Yes   Sun  Dinner     2   
191    Thur967          20    4  Female    Yes  Thur   Lunch     2   
197   Thur9313      

In [16]:
print(filtered4)

    Payment ID  total_bill  tip gender smoker  day time  size  \
0          NaN         NaN  NaN    NaN    NaN  NaN  NaN   NaN   
1          NaN         NaN  NaN    NaN    NaN  NaN  NaN   NaN   
2          NaN         NaN  NaN    NaN    NaN  NaN  NaN   NaN   
3          NaN         NaN  NaN    NaN    NaN  NaN  NaN   NaN   
4          NaN         NaN  NaN    NaN    NaN  NaN  NaN   NaN   
..         ...         ...  ...    ...    ...  ...  ...   ...   
239        NaN         NaN  NaN    NaN    NaN  NaN  NaN   NaN   
240        NaN         NaN  NaN    NaN    NaN  NaN  NaN   NaN   
241        NaN         NaN  NaN    NaN    NaN  NaN  NaN   NaN   
242        NaN         NaN  NaN    NaN    NaN  NaN  NaN   NaN   
243        NaN         NaN  NaN    NaN    NaN  NaN  NaN   NaN   

     price_per_person Payer Name  
0                 NaN        NaN  
1                 NaN        NaN  
2                 NaN        NaN  
3                 NaN        NaN  
4                 NaN        NaN  
..       

In [17]:
# ------------------------------------------------------------
# 12. Sorting Data
# ------------------------------------------------------------

# Sort by 'total_bill' ascending
sorted_by_bill = df.sort_values(by='total_bill').head(5)
print(sorted_by_bill)

# Sort by day (A–Z) and then total_bill (descending)
sorted_by_bill = df.sort_values(by=['day', 'total_bill'], 
                                ascending=[True, False])
print(sorted_by_bill.head(10))

    Payment ID  total_bill  tip  gender smoker   day    time  size  \
67     Sat3455           3    1  Female    Yes   Sat  Dinner     1   
92     Fri3780           6    1  Female    Yes   Fri  Dinner     2   
111    Sat4801           7    1  Female     No   Sat  Dinner     1   
172    Sun9209           7    5    Male    Yes   Sun  Dinner     2   
149   Thur6321           8    2    Male     No  Thur   Lunch     2   

     price_per_person      Payer Name  
67                  3   Tiffany Brock  
92                  3    Leah Ramirez  
111                 7     Terri Jones  
172                 4     Larry White  
149                 4  Daniel Robbins  
    Payment ID  total_bill  tip  gender smoker  day    time  size  \
95     Fri9628          40    5    Male    Yes  Fri  Dinner     4   
90     Fri4175          29    3    Male    Yes  Fri  Dinner     2   
96     Fri3159          27    4    Male    Yes  Fri  Dinner     2   
94     Fri2318          23    3  Female     No  Fri  Dinner    