# Pandas Basics

Content notebook covers:
- Series and DataFrames
- Creating and inspecting DataFrames
- Reading CSV files
- Selecting columns and rows
- Filtering data
- Using `.loc` and `.iloc`

In [None]:
import pandas as pd

In [None]:
# Create a Series
s = pd.Series([10, 20, 30], index=['a', 'b', 'c'])
print("Series:\n", s)

In [None]:
# Create a DataFrame
data = {
    'City': ['Lhr', 'Fsd', 'Isb'],
    'Population': [8.9, 2.1, 3.6]
}

df = pd.DataFrame(data)
print("DataFrame:\n", df)

print("\nTop row:\n", df.head(1))
print("\nBottom row:\n", df.tail(1))

print("\nInfo:\n")
print(df.info())

print("\nDescribe:\n", df.describe())

In [None]:
# Load Data from CSV
csv = """Name,Age,Score
Alice,24,95.5
Bob,27,88.0
Charlie,22,76.2
"""
with open('sample_data.csv', 'w') as f:
    f.write(csv)

# Load CSV into DataFrame
df_data = pd.read_csv('sample_data.csv')
print("DataFrame loaded from CSV:\n", df_data)

In [None]:
# Professional CSV Load
csv_content_semicolon = "Name;Age;Score\nDavid;30;99.9\n"
with open('semicolon_data.csv', 'w') as f:
    f.write(csv_content_semicolon)

df_prof = pd.read_csv('semicolon_data.csv', sep=';', dtype={'Score': 'float32'})
print("Professional CSV load:\n", df_prof)
print("Score dtype:", df_prof['Score'].dtype)

In [None]:
# Single column (Series)
ages = df_data['Age']
print("Ages (Series):\n", ages)

# Multiple columns (DataFrame)
subset = df_data[['Name', 'Score']]
print("\nName and Score (DataFrame):\n", subset)

In [None]:
# Condition
condition = df_data['Age'] > 25
print("Condition (Age > 25):\n", condition)

# Apply filter
filtered_df = df_data[condition]
print("\nFiltered Data (Age > 25):\n", filtered_df)

In [None]:
# Custom Indexing
df_data.index = ['R1', 'R2', 'R3']
print("DataFrame with Custom Index:\n", df_data)
print("Row R2:\n", df_data.loc['R2'])
print("Score of R3:", df_data.at['R3', 'Score'])
print("Rows R1 and R3:\n", df_data.loc[['R1', 'R3']])
print("Scores of R1 and R2:", df_data.loc[['R1', 'R2'], 'Score'].values)
print("Subset (R1 and R3, Name and Age):\n", df_data.loc[['R1', 'R3'], ['Name', 'Age']])