In [8]:
import numpy as np
import pandas as pd
import math

# Define the data rows
data_rows = {
    'Row1': [27.3, 92.9, -3.7, -57.7, 27.3, 21.4, 23.53],
    'Row2': [20.4,	-18.2,	-40.9,	-57.0,	173.3,	-7.6, -25.51],
    'Row3': [-2.6, 36.9, -29.6, -84.5, 473.3, -7.8, -26.05]
}

# Create a DataFrame
df = pd.DataFrame(data_rows).T  # Transpose to have rows as DataFrame rows

# Define the outliers and their replacement logic
outliers = {
    'Row1': 92.9,
    'Row2': 173.3,
    'Row3': 473.3
}

# Function to round down to 2 decimal places
def round_down(value, decimals=2):
    factor = 10.0 ** decimals
    return math.floor(value * factor) / factor

# Replace outliers with the rounded down median value
for row, outlier in outliers.items():
    # Calculate the median of the row
    mean_value = np.mean(df.loc[row])
    
    # Round down to 2 decimal places
    mean_value_rounded = round_down(mean_value, 2)
    
    # Replace outlier with rounded median value
    df.loc[row] = df.loc[row].replace(outlier, mean_value_rounded)


# Display the updated DataFrame
print(df)

         0      1     2     3      4     5      6
Row1  27.3  18.71  -3.7 -57.7  27.30  21.4  23.53
Row2  20.4 -18.20 -40.9 -57.0   6.35  -7.6 -25.51
Row3  -2.6  36.90 -29.6 -84.5  51.37  -7.8 -26.05


ModuleNotFoundError: No module named 'dataRequest'

In [11]:


def identify_outliers_data(outliers: pd.DataFrame):

    try:
        df = pd.DataFrame(outliers)

        # Check if the data is in the expected format
        if not df.empty:
            # Convert 'Percentage' columns to numeric
            percentage_cols = [col for col in df.columns if col not in ['CrimeCategory', 'ProvinceCode', 'PoliceStationCode', 'Quarter']]
            df[percentage_cols] = df[percentage_cols].apply(pd.to_numeric, errors='coerce')

            # Identify outliers per row
            outliers_list = []

            for index, row in df.iterrows():
                row_data = row[percentage_cols]
                Q1 = row_data.quantile(0.25)
                Q3 = row_data.quantile(0.75)
                IQR = Q3 - Q1
                lower_bound = Q1 - 1.5 * IQR
                upper_bound = Q3 + 1.5 * IQR

                row_outliers = row_data[(row_data < lower_bound) | (row_data > upper_bound)]
                
                if not row_outliers.empty:
                    outlier_row = row.copy()
                    outlier_row['Outliers'] = row_outliers.values
                    outliers_list.append(outlier_row)

            outliers_df = pd.DataFrame(outliers_list)

            return outliers_df

        else:
            print("Unexpected data format received from the API")
            return pd.DataFrame()  # Return an empty DataFrame if the data format is unexpected

    except Exception as e:
        print(f"Error: {e}") # Return an empty DataFrame if the data format is unexpected


In [12]:


def replace_outliers_data(outliers: pd.DataFrame):
    df = pd.DataFrame(outliers)

    # Check if the data is in the expected format
    if not df.empty:
        # Function to calculate IQR and replace outliers with the row mean
        def replace_outliers(row):
            Q1 = row.quantile(0.25)
            Q3 = row.quantile(0.75)
            IQR = Q3 - Q1
            outlier_mask = (row < (Q1 - 1.5 * IQR)) | (row > (Q3 + 1.5 * IQR))
            mean_value = row[~outlier_mask].mean().round(2)
            row[outlier_mask] = mean_value
            return row

        # Select only the percentage columns (which are the last 7 columns)
        percentage_columns = df.columns[4:]

        # Apply the function to each row
        df[percentage_columns] = df[percentage_columns].apply(replace_outliers, axis=1)

        return df

    else:
        print("Unexpected data format received from the API")
        return pd.DataFrame()
