In [120]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [143]:
class Insurance:
    def __init__(self,csv_path_file):
        self.df = pd.read_csv(csv_path_file)
        
    def get_data(self):
        return self.df
        
    def show_info(self):
        return self.df.info(), self.df.isnull().sum()
        
    def remove_duplicates(self):
        before = self.df.shape[0]
        self.df.drop_duplicates(inplace=True)
        after = self.df.shape[0]
        return f"Removed {before - after} duplicate rows."
        
    def handle_missing_values(self):
        for column in self.df.columns:
            if self.df[column].dtype == 'object':
                self.df[column] = self.df[column].fillna(self.df[column].mode()[0])
            else:
                self.df[column] = self.df[column].fillna(self.df[column].median())
                
    def missing_row_count(self):
        return self.df.isnull().sum(axis = 1)
        
    def detect_outliers(self, column):
        Q1 = self.df[column].quantile(0.25)
        Q3 = self.df[column].quantile(0.75)
        IQR = Q3 - Q1
        outliers = self.df[(self.df[column] < (Q1 - 1.5 * IQR)) | (self.df[column] > (Q3 + 1.5 * IQR))]
        print(f"Outliers in '{column}': {len(outliers)}")
        
    def describe_numerical(self):
        return self.df.describe()
        
    def average_claim_amount(self):
        print("Average Claim Amount:", self.df['total_claim_amount'].mean())
        
    def claim_distribution_by_gender(self):
        sns.boxplot(x='insured_sex', y='total_claim_amount', data=self.df)
        plt.title('Claim Distribution by Gender')
        plt.show()
        
    def fraud_ratio(self):
        ratio = self.df['fraud_reported'].value_counts(normalize=True) * 100
        print("Fraud Reported Ratio (%):\n", ratio)
        
    def correlation_matrix(self):
        corr = self.df.select_dtypes(include=np.number).corr()
        plt.figure(figsize=(10, 8))
        sns.heatmap(corr, annot=True, cmap='coolwarm')
        plt.title("Correlation Matrix")
        plt.show()
    def claim_by_state(self):
        result = self.df.groupby('policy_state')['total_claim_amount'].mean()
        print("Average Claim Amount by State:\n", result)




    

In [144]:
I1 = Insurance('insurance_claims.csv')

I1.claim_by_state()

Average Claim Amount by State:
 policy_state
IL    52844.171598
IN    53006.967742
OH    52467.187500
Name: total_claim_amount, dtype: float64
