In [2]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import sqlite3
from collections import Counter
import warnings
warnings.filterwarnings('ignore')

# ProtonDB Analysis: Hardware, Software, and Sentiment Insights

This notebook walks through key findings from ProtonDB data — from Linux distro usage to GPU trends, user sentiment, and tinkering insights.

## 1. Linux Distro Usage
This bar chart shows the number of ProtonDB reports per Linux distro.

In [3]:
def categorize_os(os):
    os = os.lower()
    if 'arch' in os:
        return 'Arch Linux'
    elif 'ubuntu' in os:
        return 'Ubuntu'
    elif 'debian' in os:
        return 'Debian'
    elif 'fedora' in os:
        return 'Fedora'
    elif 'manjaro' in os:
        return 'Manjaro'
    elif 'opensuse' in os:
        return 'openSUSE'
    elif 'pop!_os' in os:
        return 'Pop!_OS'
    elif 'mint' in os:
        return 'Linux Mint'
    elif 'steamos' in os or 'holo' in os:
        return 'SteamOS'
    elif 'endeavouros' in os:
        return 'EndeavourOS'
    else:
        return 'Other'

conn = sqlite3.connect('/mnt/data/proton_reports.sqlite')
os_df = pd.read_sql_query("SELECT `systemInfo.os` AS os FROM reports WHERE os IS NOT NULL", conn)
os_df['os_family'] = os_df['os'].apply(categorize_os)
os_counts = os_df['os_family'].value_counts()
sns.barplot(x=os_counts.index, y=os_counts.values, palette='muted')
plt.title('Distribution of Linux Distros Among ProtonDB Reports')
plt.xlabel('Linux Distro')
plt.ylabel('Number of Reports')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

OperationalError: unable to open database file

## 2. Sentiment Over Time
Sentiment analysis of user notes reveals Proton’s growing reliability.

In [4]:
sentiment_df = pd.read_sql_query("""
SELECT `timestamp` AS unix_time, `responses.concludingNotes` AS notes FROM reports
WHERE `responses.concludingNotes` IS NOT NULL
""", conn)
sentiment_df['notes'] = sentiment_df['notes'].astype(str).str.lower()
sentiment_df['date'] = pd.to_datetime(sentiment_df['unix_time'], unit='s')
sentiment_df['year_month'] = sentiment_df['date'].dt.to_period('M')
sentiment_df['sentiment'] = sentiment_df['notes'].apply(lambda x: TextBlob(x).sentiment.polarity)
monthly_sentiment = sentiment_df.groupby('year_month')['sentiment'].mean().reset_index()
monthly_sentiment['year_month'] = monthly_sentiment['year_month'].astype(str)
sns.lineplot(data=monthly_sentiment, x='year_month', y='sentiment')
plt.xticks(rotation=45, ha='right')
plt.title('Average Sentiment of ProtonDB Reports Over Time')
plt.xlabel('Month')
plt.ylabel('Average Sentiment Score')
plt.tight_layout()
plt.show()

NameError: name 'conn' is not defined