In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [7]:
from pandas.tseries.frequencies import unique_deltas
# Load the data into a pandas dataframe
data = pd.read_csv("data.csv")

# Perform basic data cleaning tasks
data.dropna(inplace=True)
data.drop_duplicates(inplace=True)
data = data.astype({'column_name': 'int32'})

# Initialise a dictionary to store the results of the data quality checks
results = {}

# Implement data quality checks

# Duplicate check
duplicates = data.duplicated().sum()
results["duplicates"] = duplicates

# Range check (for numeric data)
numeric_columns = data.select_dtypes(include=['int32', 'float64']).columns
for column in numeric_columns:
    column_min = data[column].min()
    column_max = data[column].max()
    if (column_min < 0) or (column_max > 100):
        results[column + "_range"] = "Out of range"
    else:
        results[column + "_range"] = "Within range"

# Value check (for categorical data)
categorical_columns = data.select_dtypes(include=['object'].columns)
for column in categorical_columns:
    unique_values = data[column].nunique()
    if unique_values > 10:
        results[column + "_range"] = "Too many unique values"
    else:
        results[column + "_range"] = "Acceptable number of unique values"

# Missing values check
missing_values = data.isnull().sum().sum()
results["missing_values"] = missing_values

# Store the results in a report or a summary table
results_df = pd.DataFrame.from_dict(results, orient='index', columns=['Result'])
print(results_df)

# Visualise the results to help identify patterns and trends in the data quality
plt.bar(results_df.index, results_df["Result"])
plt.xticks(rotation=90)
plt.show()