In [3]:
import json

In [7]:
# Load the data 
def load_data(filename):
    with open(filename, "r") as f:
        data = json.load(f)

    return data

In [9]:
data = load_data("store_data.json")
print(data)
print(type(data))

[{'name': 'Alice', 'rating': '5 ', 'feedback': 'Great product!!', 'age': '25'}, {'name': 'Bob', 'rating': 'four', 'feedback': 'ok but late Delivery', 'age': '30'}, {'name': ' Charlie', 'rating': 'two', 'feedback': 'BAD EXPERIENCE '}, {'name': 'Diana', 'feedback': 'Loved it!', 'rating': '5', 'age': '28'}, {'name': 'Eve', 'rating': '3.5', 'feedback': 'Average - could be better', 'age': '20'}, {'name': 'Alice', 'rating': '5', 'feedback': 'Great product again!', 'age': '25'}]
<class 'list'>


# Clean & Structure the data

In [50]:
def clean_data(data):
    text_to_num = {"one": 1, "two": 2, "three": 3, "four": 4, "five": 5}
    cleaned_data = []
    unique_users = set()
    
    for user in data:
        raw_rating = user["rating"]

        # Fix: if rating is int, no strip() needed
        if isinstance(raw_rating, int):
            pass
        else:
            raw_rating = raw_rating.strip().lower()
            if raw_rating in text_to_num:
                raw_rating = text_to_num[raw_rating]

        user["rating"] = raw_rating

        # Handle missing vals
        raw_age = user.get("age")
        if raw_age is None:
            user["age"] = None

        # Duplication
        if user["name"].strip() in unique_users:
            continue

        cleaned_data.append(user)
        unique_users.add(user["name"].strip())

    return cleaned_data


In [54]:
data = clean_data(data)

# Get meaningful insight from data

In [62]:
def get_insights(data):

    # Avg rating
    tot_rating = 0
    for user in data:
        tot_rating += float(user["rating"])

    print(f"avg rating = {tot_rating/len(data)}")

    # percentage of user with poor rating
    poor_rating = 0
    for user in data:
        if(float(user["rating"]) < 3):
            poor_rating += 1

    print(f"% of user with poor rating = {poor_rating/len(data) * 100}%")
            

In [63]:
get_insights(data)

avg rating = 3.9
% of user with poor rating = 20.0%


# Build recommendation features

In [69]:
def get_recommendations(data):
    recommendations = []

    for user in data:                      
        curr_recomm = {}
        curr_recomm["name"] = user["name"]

        if float(user["rating"]) >= 4:
            curr_recomm["brand"] = "Apple"
        else:
            curr_recomm["brand"] = "Samsung"   

        recommendations.append(curr_recomm)    

    return recommendations

In [70]:
get_recommendations(data)

[{'name': 'Alice', 'brand': 'Apple'},
 {'name': 'Bob', 'brand': 'Apple'},
 {'name': ' Charlie', 'brand': 'Samsung'},
 {'name': 'Diana', 'brand': 'Apple'},
 {'name': 'Eve', 'brand': 'Samsung'}]