## Imports

In [72]:
import os
import pandas as pd
import numpy as np
from matplotlib import pyplot as pit 
from matplotlib.cm import get_cmap
from matplotlib.colors import Normalize

### Import opinions from .json file

In [73]:
print(*[filename.split(".")[0] for filename in os.listdir("./opinions")],sep ="\n")

148801196
39562616
84514582


In [74]:
productID = input("Enter product code, please:")

In [75]:
opinions = pd.read_json(f"./opinions/{productID}.json")
opinions

Unnamed: 0,opinion_id,author,recommendation,stars,content_pl,pros_pl,cons_pl,vote_yes,vote_no,published,purchased,content_en,pros_en,cons_en
0,18779019,m...i,True,5.0,Nigdy nie chciałem słuchawek nausznych - szcze...,"[jakość dźwięku, wygląd, wygoda używania]",[],3,0,2024-07-02 16:52:53,2024-06-01 17:53:44,"I never wanted earphones - honestly, if they w...","[sound quality, appearance, convenience of use]",[]
1,17859737,k...o,True,5.0,"Piękne słuchawki, wygodne dobrze przesyła dźwi...","[jakość dźwięku, wygląd, wygoda używania]",[],4,0,2023-09-01 08:30:01,2023-08-31 11:30:38,"Beautiful headphones, comfortable, sends the s...","[sound quality, appearance, convenience of use]",[]
2,19249603,Roman,True,4.0,Ogólnie słuchawki bardzo fajne i mega długo tr...,[],[],1,0,2024-12-01 23:32:37,2024-07-23 10:45:35,"In general, the headphones are very cool and t...",[],[]
3,17543798,m...u,True,5.0,"Super słuchawki. Bateria trzyma tyle godzin, ż...","[jakość dźwięku, wygląd, wygoda używania]",[],1,0,2023-05-24 19:38:46,2023-04-23 17:00:15,Super headphones. The battery holds so many ho...,"[sound quality, appearance, convenience of use]",[]
4,17430069,r...2,True,5.0,Słuchawki dobrej jakości nawet do ps5 na strze...,[],[],1,0,2023-04-21 11:45:02,2023-04-16 19:17:47,Good quality headphones even for PS5 for shoot...,[],[]
5,17980646,c...5,True,5.0,"Bardzo dobre audio, szybko sie łączy z telefonem","[jakość dźwięku, wygląd, wygoda używania]",[],2,0,2023-10-09 09:25:12,2023-10-06 17:00:40,"Very good audio, it quickly connects to the phone","[sound quality, appearance, convenience of use]",[]
6,18737477,s...7,True,5.0,Fajne słuchawki dobra jakość dźwięku i są bard...,"[jakość dźwięku, wygląd, wygoda używania]",[],0,0,2024-06-16 05:59:51,2024-06-07 21:53:43,Cool headphones good sound quality and are ver...,"[sound quality, appearance, convenience of use]",[]
7,17346514,p...i,True,4.5,Słuchawki solidnie wykonane.,"[jakość dźwięku, wygląd, wygoda używania]",[],1,0,2023-03-31 13:32:56,2023-03-21 17:56:29,Headphones are solidly made.,"[sound quality, appearance, convenience of use]",[]
8,18218236,t...a,True,4.5,Bardzo dobra jakosc dzwieku,[],[],1,0,2023-12-17 17:49:47,2023-12-13 20:48:09,Very good sound quality,[],[]
9,18216730,t...l,True,5.0,potrzebuje czasu na ocenę,[],[],1,0,2023-12-16 07:24:25,2023-12-10 16:20:59,I need time to evaluate,[],[]


### Basic Statistics

In [76]:
opinions_count = opinions.shape[0]
pros_count = opinions.pros_pl.astype(bool).sum()
cons_count = opinions.cons_pl.astype(bool).sum()
pros_cons_count = opinions.apply(lambda o: bool(o.pros_pl) and bool(o.cons_pl), axis = 1).sum()
average_rate = opinions.stars.mean() 
print(f"The number of opinions : {opinions_count}")
print(f"The number of opinions with advantages: {pros_count}")
print(f"The number of opinions with disadvantages: {cons_count}")
print(f"The number of opinions with both: advantages and disadvantages have been listened : {cons_count}")
print(f"The average rate of the product: {average_rate:.2f}")


The number of opinions : 10
The number of opinions with advantages: 6
The number of opinions with disadvantages: 0
The number of opinions with both: advantages and disadvantages have been listened : 0
The average rate of the product: 4.80


In [77]:
pros = opinions.pros_en.explode().value_counts()
cons = opinions.cons_en.explode().value_counts()
print(pros)
print(cons)

pros_en
sound quality         6
appearance            6
convenience of use    6
Name: count, dtype: int64
Series([], Name: count, dtype: int64)


### Charts

In [78]:
if not os.path.exists("./pie_charts"):
    os.mkdir("./pie_charts")
if not os.path.exists("./bar_charts"):
    os.mkdir("./bar_charts")

In [79]:
recommendations = opinions.recommendation.value_counts(dropna = False).reindex([False,True,np.nan],fill_value=0)
recommendations

recommendation
False     0
True     10
NaN       0
Name: count, dtype: int64

In [80]:
recommendations.plot.pie(
    label = "",
    labels = {"Not recommend","Recommend","No opinion"},
    colors = ["#ca8be6","#efb0e9","#8fe7ed"],
    autopct = lambda v: f"{v:.1f}%" if v > 0 else "",
    title = f"Share of recommendaition for product {productID}",
)
pit.savefig(f"./pie_charts/{productID}.png")
pit.close()

In [81]:
stars = opinions.stars.value_counts().reindex(list(np.arange(0,5.5,0.5)),fill_value=0)
stars

stars
0.0    0
0.5    0
1.0    0
1.5    0
2.0    0
2.5    0
3.0    0
3.5    0
4.0    1
4.5    2
5.0    7
Name: count, dtype: int64

In [82]:
ax = stars.plot.bar(
    xlabel = "Rate (number of starts in range 0 to 5)",
    ylabel = "Count of rates(number of opnions)",
    title = f"Number of opinions about the the product {productID} with certain number of stars",
    color = ["crimson" if x<3 else "forestgreen" if x> 3.5 else "silver" for x in stars.index]
)
pit.xticks(rotation = 0)
for container in ax.containers:
    ax.bar_label(container)
pit.savefig(f"./bar_charts/{productID}.png")
pit.close()