In [60]:
import pandas as pd
import numpy as np

### Creating DataFrames

In [61]:
# Creating a DataFrame from a dictionary
data = {'Name': ['Alice', 'Bob', 'Charlie'], 'Age': [25, 30, 35], 'Score': [85, 90, 95]}
df = pd.DataFrame(data)
df

Unnamed: 0,Name,Age,Score
0,Alice,25,85
1,Bob,30,90
2,Charlie,35,95


In [62]:
# Creating a DataFrame from a list of lists
data = [['Alice', 25, 85], ['Bob', 30, 90], ['Charlie', 35, 95]]
df = pd.DataFrame(data, columns=['Name', 'Age', 'Score'])
df

Unnamed: 0,Name,Age,Score
0,Alice,25,85
1,Bob,30,90
2,Charlie,35,95


In [63]:
# Creating a DataFrame from a NumPy array
array = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
df = pd.DataFrame(array, columns=['A', 'B', 'C'])
df

Unnamed: 0,A,B,C
0,1,2,3
1,4,5,6
2,7,8,9


### Reading Data

In [64]:
'''
# Reading CSV
df = pd.read_csv("data.csv")

# Reading Excel
df = pd.read_excel("data.xlsx")
'''

'\n# Reading CSV\ndf = pd.read_csv("data.csv")\n\n# Reading Excel\ndf = pd.read_excel("data.xlsx")\n'

### Data Exploration

In [65]:
data = {'Name': ['Alice', 'Bob', 'Charlie'], 'Age': [25, 30, 35], 'Score': [85, 90, 95]}
df = pd.DataFrame(data)

In [66]:
print(df.head())  # First 5 rows
print(df.tail())  # Last 5 rows
print(df.info())  # Summary of the DataFrame
print(df.describe())  # Statistical summary
print(df.shape)  # (rows, columns)
print(df.columns)  # Column names
print(df.dtypes)  # Data types

      Name  Age  Score
0    Alice   25     85
1      Bob   30     90
2  Charlie   35     95
      Name  Age  Score
0    Alice   25     85
1      Bob   30     90
2  Charlie   35     95
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    3 non-null      object
 1   Age     3 non-null      int64 
 2   Score   3 non-null      int64 
dtypes: int64(2), object(1)
memory usage: 204.0+ bytes
None
        Age  Score
count   3.0    3.0
mean   30.0   90.0
std     5.0    5.0
min    25.0   85.0
25%    27.5   87.5
50%    30.0   90.0
75%    32.5   92.5
max    35.0   95.0
(3, 3)
Index(['Name', 'Age', 'Score'], dtype='object')
Name     object
Age       int64
Score     int64
dtype: object


### Selecting and Filtering Data

In [67]:
# Selecting a single column
print(df['Age'])

# Selecting multiple columns
print(df[['Name', 'Score']])

# Filtering rows based on a condition
print(df[df['Age'] > 30])

# Using multiple conditions
print(df[(df['Age'] > 25) & (df['Score'] > 85)])

0    25
1    30
2    35
Name: Age, dtype: int64
      Name  Score
0    Alice     85
1      Bob     90
2  Charlie     95
      Name  Age  Score
2  Charlie   35     95
      Name  Age  Score
1      Bob   30     90
2  Charlie   35     95


### Modifying Data

In [68]:
# Adding a new column
df['Passed'] = df['Score'] > 85

# Modifying values
df.loc[df['Name'] == 'Alice', 'Age'] = 26

# Dropping columns
df = df.drop(columns=['Passed'])

df

Unnamed: 0,Name,Age,Score
0,Alice,26,85
1,Bob,30,90
2,Charlie,35,95


### Grouping and Aggregation

In [69]:
print(df.groupby('Age').mean(numeric_only=True))  # Mean values grouped by Age
print(df.groupby('Age').sum(numeric_only=True))   # Sum values grouped by Age

     Score
Age       
26    85.0
30    90.0
35    95.0
     Score
Age       
26      85
30      90
35      95


### Sorting Data

In [70]:
print(df.sort_values(by='Score', ascending=False))  # Sort by Score descending

      Name  Age  Score
2  Charlie   35     95
1      Bob   30     90
0    Alice   26     85


### Handling Missing Data

In [71]:
# Checking for missing values
print(df.isnull().sum())

# Filling missing values
df.fillna(0, inplace=True)

# Dropping missing values
df.dropna(inplace=True)

Name     0
Age      0
Score    0
dtype: int64


### Merging and Joining Data

In [72]:
df1 = pd.DataFrame({'ID': [1, 2, 3], 'Name': ['Alice', 'Bob', 'Charlie']})
df2 = pd.DataFrame({'ID': [1, 2, 3], 'Score': [85, 90, 95]})
merged_df = pd.merge(df1, df2, on='ID')

### Pivot Tables

In [73]:
pivot = df.pivot_table(values='Score', index='Age', aggfunc='mean')
pivot

Unnamed: 0_level_0,Score
Age,Unnamed: 1_level_1
26,85.0
30,90.0
35,95.0


### Working with Dates

In [74]:
df['Date'] = ['2021-01-01', '2021-02-01', '2021-03-01']
df['Date'] = pd.to_datetime(df['Date'])
print(df['Date'].dt.year)  # Extract year
print(df['Date'].dt.month)  # Extract month

0    2021
1    2021
2    2021
Name: Date, dtype: int32
0    1
1    2
2    3
Name: Date, dtype: int32


### Applying Functions

In [75]:
def double(x):
    return x * 2

df['Score'] = df['Score'].apply(double)
df

Unnamed: 0,Name,Age,Score,Date
0,Alice,26,170,2021-01-01
1,Bob,30,180,2021-02-01
2,Charlie,35,190,2021-03-01


### Exporting Data

In [76]:
'''
df.to_csv("output.csv", index=False)
df.to_excel("output.xlsx", index=False)
'''

'\ndf.to_csv("output.csv", index=False)\ndf.to_excel("output.xlsx", index=False)\n'

### Resetting and Setting Index

In [77]:
# Reset index
df.reset_index(drop=True, inplace=True)

# Setting a column as index
# df.set_index('Name', inplace=True)