# Pandas Basics Portfolio
This notebook explores core pandas functionality with theoretical notes and practical code examples.

## DataFrame Creation and CSV I/O
Load the portfolio CSVs into pandas DataFrames.

In [None]:
import pandas as pd
customers = pd.read_csv('data/customers.csv')
orders = pd.read_csv('data/orders.csv')
products = pd.read_csv('data/products.csv')
orders_raw = orders.copy()
products.head()

## Column and Row Selection
Use indexing, `.loc`, `.iloc`, and boolean masks to select data.

In [None]:
# Select columns
name_city = customers[['name','city']]
# Boolean filter and reset_index
houston_customers = customers[customers['city']== 'Houston'].reset_index(drop=True)
# Position-based row slice
first_three = orders.iloc[:3]
first_three

## Add/Transform Columns and Apply
Create new columns and apply functions column-wise and row-wise.

In [None]:
# Column-wise with vectorized operations
products['price_eur'] = products['price'] * 0.92
# Row-wise apply to compute total price per order
orders = orders.merge(products[['product_id','price','category']], on='product_id', how='left')
orders['total_price'] = orders.apply(lambda r: r['quantity'] * r['price'], axis=1)
orders.head()

## Renaming and GroupBy
Rename columns and aggregate data, including custom percentiles.

In [None]:
orders_renamed = orders.rename(columns={'quantity':'qty'})
price_pct = products.groupby('category')['price'].agg(mean='mean', median='median', pct90=lambda x: x.quantile(0.9))
price_pct

## Multi-key GroupBy and Pivot Tables
Group by multiple keys and reshape data with pivot tables.

In [None]:
multi = orders.groupby(['customer_id','product_id'])['quantity'].sum()
pivot = orders.pivot_table(values='quantity', index='customer_id', columns='product_id', fill_value=0)
pivot.iloc[:5, :5]

## Merging and Concatenation
Combine DataFrames using various join strategies and concatenation.

In [None]:
inner = pd.merge(customers, orders_raw, on='customer_id', how='inner')
left = pd.merge(customers, orders_raw, on='customer_id', how='left')
right = pd.merge(customers, orders_raw, on='customer_id', how='right')
outer = pd.merge(customers, orders_raw, on='customer_id', how='outer')
prod_renamed = products.rename(columns={'product_id':'id'})
custom_merge = pd.merge(orders_raw, prod_renamed, left_on='product_id', right_on='id', how='inner', suffixes=('_order','_prod'))
concat_orders = pd.concat([orders_raw.head(2), orders_raw.tail(2)], ignore_index=True)
concat_orders

## Variable Types and Dtypes
Inspect and convert dtypes; treat numeric vs categorical data.

In [None]:
orders['order_id'] = orders['order_id'].astype('int64')
products['category'] = products['category'].astype('category')
orders.dtypes

## Ordered Categoricals and One-Hot Encoding
Create ordered categoricals and dummy variables.

In [None]:
products['category'] = pd.Categorical(products['category'], categories=['A','B','C','D'], ordered=True)
category_dummies = pd.get_dummies(products['category'])
category_dummies.head()

## Replace Usage
Standardize values with `replace`.

In [None]:
customers['city'] = customers['city'].replace({'New York':'NY','Los Angeles':'LA'})
customers.head()

## Data Summaries and Visualizations
Compute summary statistics and visualize distributions.

In [None]:
import matplotlib.pyplot as plt
orders['quantity'].describe()
plt.boxplot(orders['quantity'])
plt.show()
plt.hist(products['price'], bins=20)
plt.show()
products['category'].value_counts().plot(kind='bar')
plt.show()
products['category'].value_counts().plot(kind='pie')
plt.show()

## Value Counts and Proportions
Compute counts and proportions of categorical variables.

In [None]:
city_props = customers['city'].value_counts(normalize=True)
city_props

## Associations
Explore relationships between variables.

In [None]:
# Quantitative vs Categorical
products.boxplot(column='price', by='category')
plt.show()
products[products['category']=='A']['price'].plot(kind='hist', density=True, alpha=0.5)
products[products['category']=='B']['price'].plot(kind='hist', density=True, alpha=0.5)
plt.show()
# Quantitative vs Quantitative
plt.scatter(orders['quantity'], orders['total_price'])
plt.xlabel('Quantity'); plt.ylabel('Total Price')
plt.show()
orders[['quantity','total_price']].cov()
from scipy import stats
pearson_r, p_val = stats.pearsonr(orders['quantity'], orders['total_price'])
# Categorical vs Categorical
cust_orders = pd.merge(customers, orders, on='customer_id', how='inner')
cont = pd.crosstab(cust_orders['city'], cust_orders['category'])
chi2, p, dof, exp = stats.chi2_contingency(cont)
cont

## Reusable EDA Helpers
Helper functions for quick summaries.

In [None]:
def value_props(s):
    counts = s.value_counts()
    return pd.DataFrame({'freq': counts, 'prop': counts/len(s)})

def quick_corr(df):
    return df.corr(method='pearson')

value_props(customers['city']).head()
quick_corr(orders[['quantity','total_price']])