In [None]:
"""
Data Structure in Pandas
1. Series: A one-dimensional array that can hold any data type. It is similar to a column in a 
   spreadsheet or a single column in a SQL table.

2. DataFrame: A two-dimensional table with rows and columns. It is similar to a spreadsheet or a
   SQL table. Each column in a DataFrame is a Series.
"""

In [1]:
import pandas as pd

In [2]:
# Create empty dataframe
df = pd.DataFrame()
print(df)

Empty DataFrame
Columns: []
Index: []


In [4]:
data1 = [['tom', 10], ['nick', 15], ['juli', 14]]
data2 = {
    'Name': ['Tom', 'nick', 'krish', 'jack'],
    'Age': [20, 21, 19, 18]
}

df1 = pd.DataFrame(data1, columns=['Name', 'Age'])
df2 = pd.DataFrame(data2)

print(df1)
print("\n -------")
print(df2)

   Name  Age
0   tom   10
1  nick   15
2  juli   14

 -------
    Name  Age
0    Tom   20
1   nick   21
2  krish   19
3   jack   18


In [11]:
# Access a column
print(df1["Age"])
print("\n -------")
# Accessing a row by index
print(df1.loc[0])
print("\n -------")
# Accessing a specific element
element = df1.at[1, 'Name']
print(element)
print("\n -------")
# Filtering based on a condition
young_people = df1[df1['Age'] < 30]
print(young_people)

0    10
1    15
2    14
Name: Age, dtype: int64

 -------
Name    tom
Age      10
Name: 0, dtype: object

 -------
nick


In [13]:
# Adding a new column
df1['IsStudent'] = [False, True, False]

# Removing a column
df1 = df1.drop('IsStudent', axis=1)


   Name  Age
0   tom   10
1  nick   15
2  juli   14


In [14]:
# Grouping by 'City' and calculating the mean age
average_age_by_city = df.groupby('City')['Age'].mean()

# Merging two DataFrames based on 'Name'
merged_df = pd.merge(df1, df2, on='Name')

# Concatenating two DataFrames vertically
concatenated_df = pd.concat([df1, df2], axis=0)


In [None]:
"""
Handling Missing Data:
1. isnull(), notnull(): Check for missing or non-missing values.
2. fillna(): Fill missing values with a specified value or strategy.
3. dropna(): Remove rows or columns with missing values.
"""

# Checking for missing values
missing_values = df.isnull()

# Filling missing values with the mean
df_filled = df.fillna(df.mean())

