# Categorical and Datetime Handling - Solutions

Categorical data types, pd.to_datetime(), and datetime operations with .dt accessor.

## Question 1
Create a DataFrame with a column of grades ['A', 'B', 'C', 'A', 'B'] and convert it to categorical data type. Display the categories and their codes.

In [None]:
import pandas as pd
import numpy as np

df_grades = pd.DataFrame({'Grade': ['A', 'B', 'C', 'A', 'B']})
df_grades['Grade'] = df_grades['Grade'].astype('category')
print("Categories:", df_grades['Grade'].cat.categories)
print("Codes:", df_grades['Grade'].cat.codes.tolist())
print("\nDataFrame:")
print(df_grades)

## Question 2
Create a categorical column with ordered categories for sizes: ['Small', 'Medium', 'Large'] in that order. Add some data and sort by this categorical column.

In [None]:
sizes = ['Large', 'Small', 'Medium', 'Large', 'Small']
size_cat = pd.Categorical(sizes, categories=['Small', 'Medium', 'Large'], ordered=True)
df_sizes = pd.DataFrame({'Size': size_cat, 'Price': [20, 15, 18, 22, 14]})
print("Original DataFrame:")
print(df_sizes)
print("\nSorted by Size:")
print(df_sizes.sort_values('Size'))

## Question 3
Convert the string dates ['2023-01-15', '2023-02-20', '2023-03-10'] to datetime objects using pd.to_datetime().

In [None]:
date_strings = ['2023-01-15', '2023-02-20', '2023-03-10']
dates = pd.to_datetime(date_strings)
print("Original strings:", date_strings)
print("Converted to datetime:")
print(dates)
print("Data type:", type(dates[0]))

## Question 4
Create a DataFrame with a datetime column and extract the year, month, and day of week using the .dt accessor.

In [None]:
df_dates = pd.DataFrame({
    'Date': pd.to_datetime(['2023-01-15', '2023-02-20', '2023-03-10', '2023-04-05'])
})
df_dates['Year'] = df_dates['Date'].dt.year
df_dates['Month'] = df_dates['Date'].dt.month
df_dates['DayOfWeek'] = df_dates['Date'].dt.day_name()
print(df_dates)

## Question 5
Create a date range using pd.date_range() for the first 10 days of 2023 with daily frequency.

In [None]:
date_range = pd.date_range(start='2023-01-01', periods=10, freq='D')
print("First 10 days of 2023:")
print(date_range)

## Question 6
Given datetime strings in format 'dd/mm/yyyy', convert them to datetime objects and format them as 'yyyy-mm-dd'.

In [None]:
date_strings_dmy = ['15/01/2023', '20/02/2023', '10/03/2023']
dates_converted = pd.to_datetime(date_strings_dmy, format='%d/%m/%Y')
formatted_dates = dates_converted.strftime('%Y-%m-%d')
print("Original format:", date_strings_dmy)
print("Converted to yyyy-mm-dd:")
print(formatted_dates.tolist())

## Question 7
Create a DataFrame with timestamps and filter for dates within a specific month (e.g., March 2023).

In [None]:
df_timestamps = pd.DataFrame({
    'Date': pd.date_range('2023-02-15', periods=30, freq='D'),
    'Value': np.random.randint(1, 100, 30)
})
print("Original DataFrame shape:", df_timestamps.shape)
print("\nFirst few rows:")
print(df_timestamps.head())

march_data = df_timestamps[df_timestamps['Date'].dt.month == 3]
print("\nMarch 2023 data:")
print(march_data)

## Question 8
Calculate the difference in days between two datetime columns in a DataFrame.

In [None]:
df_dates_diff = pd.DataFrame({
    'Start_Date': pd.to_datetime(['2023-01-01', '2023-02-15', '2023-03-10']),
    'End_Date': pd.to_datetime(['2023-01-15', '2023-03-01', '2023-04-05'])
})
df_dates_diff['Days_Difference'] = (df_dates_diff['End_Date'] - df_dates_diff['Start_Date']).dt.days
print(df_dates_diff)

## Question 9
Resample time series data with daily frequency to monthly frequency using mean aggregation.

In [None]:
df_timeseries = pd.DataFrame({
    'Date': pd.date_range('2023-01-01', periods=90, freq='D'),
    'Value': np.random.randint(10, 100, 90)
})
df_timeseries.set_index('Date', inplace=True)
print("Original data (first 5 rows):")
print(df_timeseries.head())

monthly_avg = df_timeseries.resample('M').mean()
print("\nMonthly averages:")
print(monthly_avg)

## Question 10
Create a categorical column for seasons based on months (Dec-Feb: Winter, Mar-May: Spring, etc.) using datetime data.

In [None]:
df_seasons = pd.DataFrame({
    'Date': pd.date_range('2023-01-01', periods=12, freq='M')
})

def get_season(month):
    if month in [12, 1, 2]:
        return 'Winter'
    elif month in [3, 4, 5]:
        return 'Spring'
    elif month in [6, 7, 8]:
        return 'Summer'
    else:
        return 'Fall'

df_seasons['Season'] = df_seasons['Date'].dt.month.apply(get_season)
df_seasons['Season'] = df_seasons['Season'].astype('category')
print(df_seasons)
print("\nSeason categories:", df_seasons['Season'].cat.categories.tolist())