In [None]:
import json
import csv
import polars as pl
from config import PATH_TO_CSV

In [None]:
df = pl.read_csv(PATH_TO_CSV)

In [None]:
df = df.select(pl.all().exclude(f"^.*_[0-9]+$"))
df.columns

In [None]:
drop_colmns = ['id', 'user_id', 'photo_left', 'photo_right', 'photo_full_face', 'username', 'chat_id', 'date_created', 'question_old', 'question_allergen', 'question_medicines', 'question_skin_type']
df = df.drop(drop_colmns)

In [None]:
df.write_csv("data.csv", separator=",")

In [None]:
# Load the JSON data from the file
with open("acne_detection_results.json") as f:
    data = json.load(f)

# Initialize counts dictionary to store counts for each image
counts = {}

# Iterate through each key-value pair in the JSON data
for key, value in data.items():
    # Initialize count for the current image
    class_probs_count = 0
    # Iterate through each dictionary in the list
    for item in value:
        # Increment the count if "class_probs" exists
        if "class_probs" in item:
            class_probs_count += len(item["class_probs"])
    # Store the count for the current image
    counts[key] = class_probs_count

# Print the counts for each image
for image_name, count in counts.items():
    print(f"Image: {image_name}, Total count of pimples: {count}")

# Write the summary to a CSV file
with open("summary_file.csv", "w", newline="") as csvfile:
    fieldnames = ["image_name", "total_pimples"]
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

    writer.writeheader()
    for image_name, count in counts.items():
        writer.writerow({"image_name": image_name, "total_pimples": count})

In [None]:
# Read the CSV file
data = {}
with open("summary_file.csv", "r") as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        image_name = row["image_name"].split("_")[0]  # Extract date part
        total_pimples = int(row["total_pimples"])
        if image_name in data:
            data[image_name] += total_pimples
        else:
            data[image_name] = total_pimples

# Write the merged summary to a new CSV file
with open("merged_summary_file.csv", "w", newline="") as csvfile:
    fieldnames = ["image_name", "total_pimples"]
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    for image_name, total_pimples in data.items():
        writer.writerow(
            {"image_name": image_name, "total_pimples": total_pimples}
        )

In [None]:
# Read total class probabilities from 'merged_summary_file.csv' and store in a dictionary
total_probs_by_date = {}
with open("merged_summary_file.csv", "r") as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        total_probs_by_date[row["image_name"]] = row["total_pimples"]

# Read 'data.csv', add 'total_pimples' column, and write to a new file
with open("data.csv", "r") as csvfile:
    reader = csv.DictReader(csvfile)
    fieldnames = reader.fieldnames + ["total_pimples"]

    with open("updated_data.csv", "w", newline="") as outfile:
        writer = csv.DictWriter(outfile, fieldnames=fieldnames)
        writer.writeheader()

        for row in reader:
            date = row["date"]
            total_probs = total_probs_by_date.get(date, 0)
            row["total_pimples"] = total_probs
            writer.writerow(row)

In [None]:
data = pl.read_csv("updated_data.csv")
data.head(25)

In [None]:
# Select columns
df1 = data.select(["date", "total_pimples", "pimples"])

# Convert 'pimples' column to numeric
df1 = df1.with_columns(pl.col("pimples").cast(pl.Float64, strict=False))

# Handle string values in 'pimples' column
df1 = df1.with_columns(
    pl.when(pl.col("pimples").is_null())
    .then(pl.col("pimples").interpolate())
    .otherwise(pl.col("pimples"))
    .alias("pimples")
)

# Replace '0' values in 'total_pimples' with values from 'pimples'
df1 = df1.with_columns(
    pl.when(pl.col("total_pimples") == 0)
    .then(pl.col("pimples"))
    .otherwise(pl.col("total_pimples"))
    .alias("total_pimples")
)

# Calculate average of non-zero values in 'total_pimples'
average_df = df1.filter(pl.col("total_pimples") != 0).select(pl.col("total_pimples").mean())

average = average_df.item() if not average_df.is_empty() else 0
average = float(average) if average is not None else 0

# Replace null values with the calculated average
df1 = df1.with_columns(pl.col("total_pimples").fill_null(average))

# Update 'total_pimples' in the original data
data = data.with_columns(
    pl.col("total_pimples").cast(pl.Int64).shift(-1).alias("total_pimples")
)

# Drop 'pimples' column
data = data.drop("pimples")

# Write to CSV
data.write_csv("data_with_pimples.csv")