In [None]:
# Import the libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import seaborn as sns
import re
from wordcloud import WordCloud, STOPWORDS

In [None]:
# Load and inspect frequency response data
frequency_data = pd.read_json('../../data/headphones-fr-data.json')

frequency_data.head()

In [None]:
# Clean 
sample_fr_header = frequency_data['header'][0]
print(sample_fr_header)

frequency_data = frequency_data.drop(columns=['header'])

print(frequency_data.isnull().sum())

frequency_data.head()

In [None]:
# Summary statistics 
frequency_response_list = frequency_data['data'].tolist()

flat_list = [item for sublist in frequency_response_list for item in sublist]
freq_df = pd.DataFrame(flat_list, columns=sample_fr_header)

summary_stats = freq_df.describe()
print(summary_stats)

In [None]:
# Plot frequency response for a given headphone index
def plot_frequency_response(headphone_index):
    single_headphone_response = frequency_response_list[headphone_index]
    single_headphone_df = pd.DataFrame(single_headphone_response, columns=sample_fr_header)
    
    headphone_name = frequency_data.loc[headphone_index, 'fullname']

    plt.figure(figsize=(14, 8))
    plt.plot(single_headphone_df['Frequency'], single_headphone_df['Left'], label='Left Channel')
    plt.plot(single_headphone_df['Frequency'], single_headphone_df['Right'], label='Right Channel')
    plt.plot(single_headphone_df['Frequency'], single_headphone_df['Target Response'], label='Target Response', linestyle='--')
    plt.legend()

    # Human hearing range is from 20 Hz to 20 kHz
    # plt.xscale('log')
    # plt.xlim(20, 20000)

    # RTINGS.com does their scoring between 20Hz and 9kHz. 
    plt.xscale('log')
    plt.xlim(20, 9000)

    # plt.ylim(50, 100)

    # Set the amplitude range from 55 dB to 115 dB
    plt.ylim(50, 115)

    # Customize x-axis labels
    ax = plt.gca()
    ax.xaxis.set_major_formatter(ticker.FuncFormatter(lambda x, _: f'{int(x)}'))

    # Customize x-axis ticks for human hearing range
    # ax.set_xticks([20, 50, 100, 200, 500, 1000, 2000, 5000, 10000, 20000])

    # Customize x-axis ticks for RTINGS.com scoring range 
    ax.set_xticks([20, 100, 2000, 5000, 10000])
    
    plt.title(f'Frequency Response for {headphone_name}')
    plt.xlabel('Frequency (Hz)')
    plt.ylabel('Amplitude (dB)')
    plt.grid(True, which='both', linestyle='--', linewidth=0.5)
    plt.show()

plot_frequency_response(9)


In [None]:
# Plot average frequency responses with standard deviation
mean_responses = freq_df.groupby('Frequency').mean().reset_index()
std_responses = freq_df.groupby('Frequency').std().reset_index()

plt.figure(figsize=(14, 8))
plt.plot(mean_responses['Frequency'], mean_responses['Left'], label='Left Channel Mean')
plt.plot(mean_responses['Frequency'], mean_responses['Right'], label='Right Channel Mean')
plt.plot(mean_responses['Frequency'], mean_responses['Target Response'], label='Target Response', linestyle='--')

plt.fill_between(mean_responses['Frequency'], mean_responses['Left'] - std_responses['Left'], mean_responses['Left'] + std_responses['Left'], alpha=0.3)
plt.fill_between(mean_responses['Frequency'], mean_responses['Right'] - std_responses['Right'], mean_responses['Right'] + std_responses['Right'], alpha=0.3)

plt.legend()
plt.xscale('log')
plt.xlim(20, 9000)
plt.ylim(50, 115)
ax = plt.gca()
ax.xaxis.set_major_formatter(ticker.FuncFormatter(lambda x, _: f'{int(x)}'))
ax.set_xticks([20, 100, 2000, 5000, 10000])
plt.title('Average Frequency Response with Standard Deviation')
plt.xlabel('Frequency (Hz)')
plt.ylabel('Amplitude (dB)')
plt.show()


In [None]:
# Plot frequency responses for a sample of headphones
selected_headphones = [3, 9]
plt.figure(figsize=(14, 8))

single_headphone_response = frequency_response_list[0]
single_headphone_df = pd.DataFrame(single_headphone_response, columns=sample_fr_header)

for i in selected_headphones:
    headphone_response = frequency_response_list[i]
    headphone_df = pd.DataFrame(headphone_response, columns=sample_fr_header)
    plt.plot(headphone_df['Frequency'], headphone_df['Left'], label=f'{frequency_data["fullname"][i]} - Left Channel')
    plt.plot(headphone_df['Frequency'], headphone_df['Right'], label=f'{frequency_data["fullname"][i]} - Right Channel')

plt.plot(single_headphone_df['Frequency'], single_headphone_df['Target Response'], label='Target Response', linestyle='--')
plt.legend()
plt.xscale('log')
plt.xlim(20, 9000)
plt.ylim(50, 115)
ax = plt.gca()
ax.xaxis.set_major_formatter(ticker.FuncFormatter(lambda x, _: f'{int(x)}'))
ax.set_xticks([20, 100, 2000, 5000, 10000])
plt.title('Frequency Responses for a sample of Headphones')
plt.xlabel('Frequency (Hz)')
plt.ylabel('Amplitude (dB)')
plt.show()


In [None]:
# Load and inspect scores data
scores_data = pd.read_json('../../data/headphones-data.json')

scores_data.head()

In [None]:
# Inspect missing values 
print(scores_data.isnull().sum())

scores_data.head()

In [None]:
# Clean
scores_data.replace(r'^\s*$', np.nan, regex=True, inplace=True)
scores_data = scores_data.dropna()
print(scores_data.isnull().sum())
scores_data.head()

In [None]:
# Summary statistics for scores 
score_columns = ['neutralSoundScore', 'bassAccuracyScore', 'midAccuracyScore', 'trebleAccuracyScore']
basic_stats = scores_data[score_columns].describe()
print(basic_stats)


In [None]:
# Histograms for score distributions
plt.figure(figsize=(12, 8))
for i, column in enumerate(score_columns):
    plt.subplot(2, 2, i+1)
    sns.histplot(scores_data[column], kde=True, bins=20)
    plt.title(f'Distribution of {column}')
plt.tight_layout()
plt.show()

In [None]:
# Word cloud of descriptions
descriptions = ' '.join(scores_data['bassAccuracyDescription'].fillna('') + ' ' +
                        scores_data['midAccuracyDescription'].fillna('') + ' ' +
                        scores_data['trebleAccuracyDescription'].fillna(''))

words = re.findall(r'\b\w+\b', descriptions.lower())

stopwords = set(STOPWORDS)
filtered_words = [word for word in words if word not in stopwords]
filtered_words = [word for word in filtered_words if len(word) > 2]
wordcloud = WordCloud(width=800, height=400, background_color='white', stopwords=stopwords).generate(' '.join(filtered_words))

plt.figure(figsize=(10, 8))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.title('Word Cloud of Descriptions')
plt.show()