In [2]:
import pandas as pd

# Data for our table
student_data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eva', 'Henry'],
    'Age': [22, 23, 21, 22, 24, 30],
    'Major': ['Physics', 'History', 'Computer Science', 'Math', 'Physics', 'Chemistry']
}

# Create a DataFrame
df = pd.DataFrame(student_data)
df

Unnamed: 0,Name,Age,Major
0,Alice,22,Physics
1,Bob,23,History
2,Charlie,21,Computer Science
3,David,22,Math
4,Eva,24,Physics
5,Henry,30,Chemistry


In [3]:
# Step 1: Prepare the data as a list of lists. Each inner list is a row.
data = [
    ['Alice', 22, 'Physics'],
    ['Bob', 23, 'History'],
    ['Charlie', 21, 'Computer Science'],
    ['David', 22, 'Math'],
    ['Eva', 24, 'Physics'],
    ['Henry', 30, 'Chemistry']
]

# Step 2: Create a separate list for the column names in the correct order.
col = ['Name', 'Age', 'Major']

# Step 3: Create the DataFrame, passing in the data and the column names.
df = pd.DataFrame(data, columns=col)
df

Unnamed: 0,Name,Age,Major
0,Alice,22,Physics
1,Bob,23,History
2,Charlie,21,Computer Science
3,David,22,Math
4,Eva,24,Physics
5,Henry,30,Chemistry


In [4]:
# Show the first 3 rows (default is 5)
df.head(3)

Unnamed: 0,Name,Age,Major
0,Alice,22,Physics
1,Bob,23,History
2,Charlie,21,Computer Science


In [5]:
# Show the last 3 rows (default is 5)
df.tail(3)

Unnamed: 0,Name,Age,Major
3,David,22,Math
4,Eva,24,Physics
5,Henry,30,Chemistry


In [6]:
# Get the number of rows and coandlumns
print(f"{df.shape[0]} rows  {df.shape[1]} columns.")

6 rows  3 columns.


In [7]:
# Get a concise summary
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    6 non-null      object
 1   Age     6 non-null      int64 
 2   Major   6 non-null      object
dtypes: int64(1), object(2)
memory usage: 272.0+ bytes


In [8]:
df.describe()

Unnamed: 0,Age
count,6.0
mean,23.666667
std,3.265986
min,21.0
25%,22.0
50%,22.5
75%,23.75
max,30.0


In [9]:
# Select only the 'Name' column
df['Name']

0      Alice
1        Bob
2    Charlie
3      David
4        Eva
5      Henry
Name: Name, dtype: object

In [10]:
# Select the 'Name' and 'Major' columns
df[['Name', 'Major']]

Unnamed: 0,Name,Major
0,Alice,Physics
1,Bob,History
2,Charlie,Computer Science
3,David,Math
4,Eva,Physics
5,Henry,Chemistry


In [11]:
# Get all students with Age > 22
df[df['Age'] > 22]

Unnamed: 0,Name,Age,Major
1,Bob,23,History
4,Eva,24,Physics
5,Henry,30,Chemistry


In [12]:
# Get all Physics majors older than 22
# Note the parentheses around each condition
df[(df['Major'] == 'Physics') & (df['Age'] > 22)]

Unnamed: 0,Name,Age,Major
4,Eva,24,Physics


In [13]:
# Add a 'GPA' column
df['GPA'] = [3.8, 3.5, 3.9, 3.4, 3.7, 3.8]
df

Unnamed: 0,Name,Age,Major,GPA
0,Alice,22,Physics,3.8
1,Bob,23,History,3.5
2,Charlie,21,Computer Science,3.9
3,David,22,Math,3.4
4,Eva,24,Physics,3.7
5,Henry,30,Chemistry,3.8


In [14]:
# Drop the 'GPA' column
df.drop('GPA', axis=1)

Unnamed: 0,Name,Age,Major
0,Alice,22,Physics
1,Bob,23,History
2,Charlie,21,Computer Science
3,David,22,Math
4,Eva,24,Physics
5,Henry,30,Chemistry


In [15]:
# Get statistics for numerical columns
df.describe()

Unnamed: 0,Age,GPA
count,6.0,6.0
mean,23.666667,3.683333
std,3.265986,0.194079
min,21.0,3.4
25%,22.0,3.55
50%,22.5,3.75
75%,23.75,3.8
max,30.0,3.9


In [16]:
# Calculate the average age
average_age = df['Age'].mean()
print(f"\nThe average age of the students: {average_age}")


The average age of the students: 23.666666666666668


In [17]:
invoice = pd.read_csv('invoice_1.csv')
pd.DataFrame(invoice)

Unnamed: 0,Project Name,Client,Invoice Date,Hourly Rate,Task Description,Start Time,End Time,Duration (Hours),Cost,Total Hours,Total Cost
0,Website Redesign,Innovate Corp,2025-08-07,$75.00,,,,,,,
1,,,,,Developing wireframes for the main dashboard,2025-08-06 09:00,2025-08-06 13:00,4.0,300.0,,
2,,,,,Initial client meeting and requirement gathering,2025-08-07 10:00,2025-08-07 11:30,1.5,112.5,,
3,,,,,Designing the main landing page,2025-08-07 19:15,2025-08-07 19:16,0.02,1.21,,
4,,,,,,,,,,5.52,$413.72
