In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# Set style for plots
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

# Import our custom data loader
import sys
sys.path.append('../src')
from data.data_loader import InsuranceDataLoader

# Load and convert data
data_loader = InsuranceDataLoader('../data/raw/MachineLearningRating_v3.txt')
raw_data = data_loader.load_raw_data()

# Save as CSV
data_loader.save_as_csv('../data/processed/insurance_data.csv')

# Get basic info
data_info = data_loader.get_data_info()
print("Dataset Information:")
print(f"Shape: {data_info['shape']}")
print(f"Memory Usage: {data_info['memory_usage'] / 1024**2:.2f} MB")

# Basic preprocessing
df = data_loader.basic_preprocessing()

# Display first few rows
print("\nFirst 5 rows:")
df.head()

INFO:data.data_loader:Loading data from ..\data\raw\MachineLearningRating_v3.txt
INFO:data.data_loader:Successfully loaded 1000098 rows and 52 columns
INFO:data.data_loader:Data saved as CSV to ..\data\processed\insurance_data.csv
INFO:data.data_loader:Starting basic preprocessing...


Dataset Information:
Shape: (1000098, 52)
Memory Usage: 2124.34 MB


INFO:data.data_loader:Basic preprocessing completed



First 5 rows:


Unnamed: 0,UnderwrittenCoverID,PolicyID,TransactionMonth,IsVATRegistered,Citizenship,LegalType,Title,Language,Bank,AccountType,...,ExcessSelected,CoverCategory,CoverType,CoverGroup,Section,Product,StatutoryClass,StatutoryRiskType,TotalPremium,TotalClaims
0,145249,12827,2015-03-01,True,,Close Corporation,Mr,English,First National Bank,Current account,...,Mobility - Windscreen,Windscreen,Windscreen,Comprehensive - Taxi,Motor Comprehensive,Mobility Metered Taxis: Monthly,Commercial,IFRS Constant,21.929825,0.0
1,145249,12827,2015-05-01,True,,Close Corporation,Mr,English,First National Bank,Current account,...,Mobility - Windscreen,Windscreen,Windscreen,Comprehensive - Taxi,Motor Comprehensive,Mobility Metered Taxis: Monthly,Commercial,IFRS Constant,21.929825,0.0
2,145249,12827,2015-07-01,True,,Close Corporation,Mr,English,First National Bank,Current account,...,Mobility - Windscreen,Windscreen,Windscreen,Comprehensive - Taxi,Motor Comprehensive,Mobility Metered Taxis: Monthly,Commercial,IFRS Constant,0.0,0.0
3,145255,12827,2015-05-01,True,,Close Corporation,Mr,English,First National Bank,Current account,...,Mobility - Metered Taxis - R2000,Own damage,Own Damage,Comprehensive - Taxi,Motor Comprehensive,Mobility Metered Taxis: Monthly,Commercial,IFRS Constant,512.84807,0.0
4,145255,12827,2015-07-01,True,,Close Corporation,Mr,English,First National Bank,Current account,...,Mobility - Metered Taxis - R2000,Own damage,Own Damage,Comprehensive - Taxi,Motor Comprehensive,Mobility Metered Taxis: Monthly,Commercial,IFRS Constant,0.0,0.0
