In [11]:
import pandas as pd
import json
import csv

from config import PATH_TO_CSV

In [12]:
df = pd.read_csv(PATH_TO_CSV)

In [13]:
drop_colmns = [
    "Timestamp",
    "Email Address",
    "Your name is",
    "When do you wake up?",
    "What did you eat in the morning? (if you want to add photo)",
    "What did you use in the morning? (if you want to add photo)",
    "You can add a photo of your face (not required)",
    "What did you eat for lunch? (if you want to add photo)",
    "What did you eat in the evening? (if you want to add a photo)",
    "What do you use in your evening skincare routine? (if you want to add a photo)",
    "When did you go to bed?",
]

df.drop(columns=drop_colmns, inplace=True)

In [15]:
column_rename_map = {
    # 'Timestamp': 'Timestamp',
    # 'Email Address': 'Email',
    # 'Your name is': 'Name',
    "Today is": "Today",
    # 'When do you wake up?': 'Wake Up Time',
    "What did you use in the morning? ": "Morning Routine",
    # 'What did you use in the morning? (if you want to add photo)': 'Morning Routine Photo',
    "Did you use sunscreen?": "Sunscreen",
    "How many pimples were on your face this morning?": "Pimples",
    # 'You can add a photo of your face (not required)': 'Face Photo',
    "Have you been drinking enough water? (+1.5 L.)": "Water Intake",
    "Have you been drinking tea? (in OTHER how many cups, not required)": "Tea Intake",
    "Have you been drinking coffee? (in OTHER how many cups, not required)": "Coffee Intake",
    "Have you been drinking milk? (in OTHER type of milk, not required)": "Milk Intake",
    "Have you been drinking other types of drinks? (in OTHER type of drink, not required)": "Other Drinks Intake",
    "Breakfast \nHave you been eating some food that includes these ingredients? (in OTHER type of food, not required)": "Breakfast Food",
    # 'What did you eat in the morning? (if you want to add photo)': 'Breakfast Photo',
    "Lunch\nHave you been eating some food that includes these ingredients? (in OTHER type of food, not required)": "Lunch Food",
    # 'What did you eat for lunch? (if you want to add photo)': 'Lunch Photo',
    "Dinner\nHave you been eating some food that includes these ingredients? (in OTHER type of food, not required)": "Dinner Food",
    # 'What did you eat in the evening? (if you want to add a photo)': 'Dinner Photo',
    "Did you have any snacks?(in OTHER type of food, not required)": "Snacks",
    "Did you take any supplements due day? ": "Supplements",
    "Did you have a stress? ": "Stress",
    "What was your physical activity?  (in other type or how many steps or both) ": "Physical Activity",
    "What do you use in your evening skincare routine?": "Evening Skincare",
    # 'What do you use in your evening skincare routine? (if you want to add a photo)': 'Evening Skincare Photo',
    "Did you use a mask?": "Mask",
    # 'When did you go to bed?': 'Bedtime'
}

In [16]:
df.rename(columns=column_rename_map, inplace=True)
df["Today"] = pd.to_datetime(df["Today"])
df["Today"] = df["Today"].dt.strftime("%Y%m%d")

df.to_csv("data.csv", index=False)

In [6]:
# Load the JSON data from the file
with open("acne_detection_results.json") as f:
    data = json.load(f)

# Initialize counts dictionary to store counts for each image
counts = {}

# Iterate through each key-value pair in the JSON data
for key, value in data.items():
    # Initialize count for the current image
    class_probs_count = 0
    # Iterate through each dictionary in the list
    for item in value:
        # Increment the count if "class_probs" exists
        if "class_probs" in item:
            class_probs_count += len(item["class_probs"])
    # Store the count for the current image
    counts[key] = class_probs_count

# Print the counts for each image
for image_name, count in counts.items():
    print(f"Image: {image_name}, Total count of pimples: {count}")

# Write the summary to a CSV file
with open("summary_file.csv", "w", newline="") as csvfile:
    fieldnames = ["image_name", "total_pimples"]
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

    writer.writeheader()
    for image_name, count in counts.items():
        writer.writerow({"image_name": image_name, "total_pimples": count})

Image: 20230710_face1.jpg, Total count of pimples: 1
Image: 20230709_face2.jpg, Total count of pimples: 2
Image: 20230707_face3.jpg, Total count of pimples: 2


In [7]:
# Read the CSV file
data = {}
with open("summary_file.csv", "r") as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        image_name = row["image_name"].split("_")[0]  # Extract date part
        total_pimples = int(row["total_pimples"])
        if image_name in data:
            data[image_name] += total_pimples
        else:
            data[image_name] = total_pimples

# Write the merged summary to a new CSV file
with open("merged_summary_file.csv", "w", newline="") as csvfile:
    fieldnames = ["image_name", "total_pimples"]
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    for image_name, total_pimples in data.items():
        writer.writerow(
            {"image_name": image_name, "total_pimples": total_pimples}
        )

In [8]:
# Read total class probabilities from 'merged_summary_file.csv' and store in a dictionary
total_probs_by_date = {}
with open("merged_summary_file.csv", "r") as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        total_probs_by_date[row["image_name"]] = row["total_pimples"]

# Read 'data.csv', add 'total_pimples' column, and write to a new file
with open("data.csv", "r") as csvfile:
    reader = csv.DictReader(csvfile)
    fieldnames = reader.fieldnames + ["total_pimples"]

    with open("updated_data.csv", "w", newline="") as outfile:
        writer = csv.DictWriter(outfile, fieldnames=fieldnames)
        writer.writeheader()

        for row in reader:
            date = row["Today"]
            total_probs = total_probs_by_date.get(date, 0)
            row["total_pimples"] = total_probs
            writer.writerow(row)

In [9]:
data = pd.read_csv("updated_data.csv")
data.head(25)

Unnamed: 0,Today,Morning Routine,Sunscreen,Pimples,Water Intake,Tea Intake,Coffee Intake,Milk Intake,Other Drinks Intake,Breakfast Food,Lunch Food,Dinner Food,Snacks,Supplements,Stress,Physical Activity,Evening Skincare,Mask,total_pimples
0,20230705,cleaner,no,Many,yes,no,"yes, 1","yes, cow",alcohol,"gluten, white sugar, sweetener","gluten, lactose, white sugar","greens, nightshade (tomatoes, potatoes, eggpla...",yes,no,no,"steps, 9100","cleaner, cream",no,0
1,20230706,"cleaner, tonic, cream",no,A lot,yes,no,yes,"yes, cow",sparkling mineral water,"lactose, white sugar",Ramen,"greens, red meet, white meet",no,no,no,"workout, steps","cleaner, cream",no,0
2,20230707,"cleaner, cream",no,A lot,no,no,yes,"yes, cow",soda water (cola/pepsi/fanta/schweppes/other),"gluten, lactose, sweetener, fruits, sweets","sweetener, sweets","greens, red meet, sweets",no,no,low,steps,"cleaner, cream",no,2
3,20230708,"cleaner, tonic, cream, serum",no,A lot,no,no,yes,"yes, cow",alcohol,"gluten, other type of sugar, white sugar","greens, red meet","red meet, mushrooms",yes,no,no,"workout, steps","cleaner, cream",no,0
4,20230709,"cleaner, tonic, cream",no,A lot,yes,no,yes,"yes, cow","alcohol, soda water (cola/pepsi/fanta/schweppe...",gluten,"red meet, gluten","greens, white meet, gluten, sweetener, sweets",yes,no,low,steps,"cleaner, cream",no,2
5,20230710,"cleaner, tonic, cream",no,A lot,yes,yes,yes,"yes, cow",no,"lactose, other type of sugar","greens, red meet","red meet, starch",yes,yes,low,steps,"cleaner, tonic, Cleansing oil",no,1
6,20230711,"cleaner, cream",no,Many,yes,yes,no,no,Energy drink,"lactose, other type of sugar",mushrooms,"red meet, starch, nightshade (tomatoes, potato...",yes,yes,no,steps,"cleaner, tonic, cream, serum, Cleansing oil, p...",no,0
7,20230712,"cleaner, tonic, cream",no,A lot,yes,yes,yes,"yes, cow",no,"red meet, gluten",red meet,"fish, nightshade (tomatoes, potatoes, eggplant...",yes,yes,no,steps,"cleaner, cream, Cleansing oil",no,0
8,20230713,"cleaner, tonic, cream, serum",no,A lot,no,no,yes,"yes, cow",alcohol,"gluten, nightshade (tomatoes, potatoes, eggpla...","greens, red meet, white meet, gluten, lactose,...","red meet, white meet, gluten, nightshade (toma...",no,yes,no,steps,"cleaner, Cleansing oil, zink",no,0
9,20230714,"cleaner, tonic, cream",no,A lot,yes,yes,yes,"yes, cow",no,lactose,"white meet, nightshade (tomatoes, potatoes, eg...","greens, white meet, nightshade (tomatoes, pota...",no,yes,no,steps,Make up cleansing wipes,no,0


In [10]:
df1 = data[["Today", "total_pimples", "Pimples"]]

# Convert 'Pimples' column to numeric (ignore errors for non-numeric values)
df1["Pimples"] = pd.to_numeric(df1["Pimples"], errors="coerce")

# Iterate over the 'Pimples' column
for i in range(1, len(df1) - 1):
    if isinstance(df1.at[i, "Pimples"], str):  # If the value is text
        # Calculate the average of neighboring rows
        avg_pimples = (df1.at[i - 1, "Pimples"] + df1.at[i + 1, "Pimples"]) / 2
        df1.at[i, "Pimples"] = avg_pimples

# Replace '0' values in 'total_pimples' column with values from 'Pimples' column
df1.loc[df1["total_pimples"] == 0, "total_pimples"] = df1["Pimples"]

# Calculate the average of non-zero values in the column
average = df1.loc[df1["total_pimples"] != 0, "total_pimples"].mean()

# Replace '0' values with the calculated average
df1["total_pimples"].fillna(average, inplace=True)

data["total_pimples"] = df1["total_pimples"].astype(int).shift(-1)
data = data.drop(columns=["Pimples"])
data.to_csv('data_with_pimples.csv', index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1["Pimples"] = pd.to_numeric(df1["Pimples"], errors="coerce")
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1["total_pimples"].fillna(average, inplace=True)
