In [34]:
import json
import json5
import demjson

In [None]:
with open('../../data/australian_user_reviews.json', 'r') as f:
    user_reviews = demjson.decode(f.read())


In [None]:

with open('../../data/australian_users_items.json', 'r') as f:
    user_items = demjson.decode(f.read())


In [None]:
with open('../../data/steam_games.json', 'r') as f:
    steam_games = demjson.decode(f.read())

# with open('../../data/steam_review.json', 'r') as f:
#     steam_reviews = demjson.decode(f.read())

In [None]:
# with open('../../data/review_test.json', 'r') as f:
#     user_reviews = demjson.decode(f.read())

# for i in range(len(user_reviews)):
#     for t in user_reviews[i]['reviews']:
#         for k, v in t.items():
#             print(k, v)
        

game_details = {game['id']: {
    'app_name': game['app_name'],
    'tags': game['tags'],
    'price': game['price'],
    'discount_price': game['discount_price'],
    'early_access': game['early_access'],
    'specs': game['specs'],
    'url': game['url'],
    'reviews_url': game['reviews_url']
} for game in steam_games}


user_data = {}

for user in user_reviews:
    user_id = user['user_id']
    user_data[user_id] = {
        'user_url': user['user_url'],
        'reviews': []
    }

    for review in user['reviews']:
        game_id = review['item_id']
        game_info = game_details.get(game_id, {})
        user_data[user_id]['reviews'].append({
            'game_id': game_id,
            'game_name': game_info.get('app_name', 'Unknown'),
            'review': review['review'],
            'posted': review['posted'],
            'recommend': review['recommend'],
            'price': game_info.get('price', 0),
            'discount_price': game_info.get('discount_price', 0),
            'tags': game_info.get('tags', []),
            'url': game_info.get('url', '')
        })

for user in user_items:
    user_id = user['user_id']
    if user_id in user_data: 
        user_data[user_id]['items'] = []
        for item in user['items']:
            game_id = item['item_id']
            game_info = game_details.get(game_id, {})
            user_data[user_id]['items'].append({
                'game_id': game_id,
                'game_name': game_info.get('app_name', 'Unknown'),
                'playtime_forever': item['playtime_forever'],
                'playtime_2weeks': item['playtime_2weeks'],
                'price': game_info.get('price', 0),
                'discount_price': game_info.get('discount_price', 0),
                'tags': game_info.get('tags', []),
                'url': game_info.get('url', '')
            })


KeyboardInterrupt: 

In [None]:
import numpy as np
from sklearn.preprocessing import MultiLabelBinarizer, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

def get_game_overall_rating(game_id):
    reviews = user_data.get(game_id, {}).get('reviews', [])
    if len(reviews) == 0:
        return 0
    return sum(1 for r in reviews if r['recommend']) / len(reviews)


X_combined = []
y_combined = []

for user_id, user_info in user_data.items():
    for review in user_info.get('reviews', []):
        game_id = review['game_id']
        playtime = next((item['playtime_forever'] for item in user_info.get('items', []) if item['game_id'] == game_id), 0)
        tags = game_details.get(game_id, {}).get('tags', [])
        developer = game_details.get(game_id, {}).get('developer', 'Unknown')
        overall_rating = get_game_overall_rating(game_id)  
        user_avg_rating = sum(1 for r in user_info.get('reviews', []) if r['recommend']) / len(user_info.get('reviews', [])) if len(user_info.get('reviews', [])) > 0 else 0

        # One-Hot Encoding tags
        mlb = MultiLabelBinarizer()
        tags_encoded = mlb.fit_transform([tags])[0]

        # Label Encoding developer
        le = LabelEncoder()
        developer_encoded = le.fit_transform([developer])[0]

        X_combined.append([playtime] + tags_encoded.tolist() + [developer_encoded, overall_rating, user_avg_rating])

        y_combined.append(1 if review['recommend'] else 0)

X_combined = np.array(X_combined)
y_combined = np.array(y_combined)

X_train, X_test, y_train, y_test = train_test_split(X_combined, y_combined, test_size=0.2, random_state=42)

model = RandomForestClassifier()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))



In [None]:
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier

model1 = LogisticRegression()
model2 = RandomForestClassifier()
model3 = SVC()

voting_clf = VotingClassifier(estimators=[('lr', model1), ('rf', model2), ('svc', model3)], voting='hard')

voting_clf.fit(X_train, y_train)

y_pred_voting = voting_clf.predict(X_test)
print(classification_report(y_test, y_pred_voting))
