# Neuroscience Data Analysis with Python – MiniProject #1

## Imports

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

---
## Q1: Analyzing Neural Spike Patterns

### Setting up the spike data

In [None]:
# Neuron A: Regular neuron, no noise – fires exactly every 10 ms
neuron_a_spikes = np.arange(10, 5001, 10)

# Neuron B: Regular neuron with Gaussian noise
# Mean interval = 10 ms, small random fluctuations around it
np.random.seed(42)
noise_b = np.random.normal(loc=10, scale=1.5, size=500)
neuron_b_spikes = np.cumsum(noise_b)
neuron_b_spikes = neuron_b_spikes[neuron_b_spikes <= 5000]  # keep within 5 seconds

# Neuron C: Bursting neuron
# 5 rapid spikes (2 ms apart) every 100 ms
neuron_c_spikes = []
burst_starts = np.arange(1, 5001, 100)
for start in burst_starts:
    burst = [start + i * 2 for i in range(5)]
    neuron_c_spikes.extend(burst)
neuron_c_spikes = np.array([s for s in neuron_c_spikes if s <= 5000])

print(f"Neuron A: {len(neuron_a_spikes)} spikes")
print(f"Neuron B: {len(neuron_b_spikes)} spikes")
print(f"Neuron C: {len(neuron_c_spikes)} spikes")

### Part A: Raster Plot

In [None]:
fig, ax = plt.subplots(figsize=(12, 4))

# eventplot draws a horizontal row of vertical lines for each spike time
ax.eventplot([neuron_a_spikes, neuron_b_spikes, neuron_c_spikes],
             colors=['#e74c3c', '#3498db', '#2ecc71'],
             lineoffsets=[1, 2, 3],
             linelengths=0.7,
             linewidths=1.2)

ax.set_yticks([1, 2, 3])
ax.set_yticklabels(['Neuron A\n(Regular)', 'Neuron B\n(Noisy)', 'Neuron C\n(Bursting)'])
ax.set_xlabel('Time (ms)')
ax.set_title('Raster Plot – Spike Trains of Three Neurons')
ax.set_xlim(0, 5000)
ax.grid(True, axis='x', alpha=0.3)

plt.tight_layout()
plt.show()

**What did I see?**

Neuron A fires perfectly evenly-spaced spikes across the entire 5 seconds – exactly as expected for a regular neuron with no noise.  
Neuron B looks very similar to A, but the spacing between spikes is slightly irregular due to the Gaussian noise added to each interval.  
Neuron C clearly shows clusters (bursts) of tightly packed spikes separated by quiet gaps – this is the classic bursting pattern.  
The results matched my expectations based on how each neuron type was defined.

### Part B: Inter-Spike Interval (ISI) Analysis

In [None]:
# Calculate ISI = time between consecutive spikes
isi_a = np.diff(neuron_a_spikes)
isi_b = np.diff(neuron_b_spikes)
isi_c = np.diff(neuron_c_spikes)

# Plot ISI histograms side by side
fig, axes = plt.subplots(1, 3, figsize=(14, 4))

# Neuron A – all intervals are exactly 10 ms, so we expect a single sharp bar
axes[0].hist(isi_a, bins=np.arange(0, 25, 0.5), color='#e74c3c', edgecolor='black', alpha=0.8)
axes[0].set_title('Neuron A – Regular (No Noise)')
axes[0].set_xlabel('Inter-spike interval (ms)')
axes[0].set_ylabel('Count')
axes[0].set_xlim(0, 25)

# Neuron B – noisy intervals, should spread around 10 ms
axes[1].hist(isi_b, bins=np.arange(0, 25, 0.5), color='#3498db', edgecolor='black', alpha=0.8)
axes[1].set_title('Neuron B – Regular with Noise')
axes[1].set_xlabel('Inter-spike interval (ms)')
axes[1].set_ylabel('Count')
axes[1].set_xlim(0, 25)

# Neuron C – two groups: short (within burst) and long (between bursts)
axes[2].hist(isi_c, bins=np.arange(0, 120, 1), color='#2ecc71', edgecolor='black', alpha=0.8)
axes[2].set_title('Neuron C – Bursting')
axes[2].set_xlabel('Inter-spike interval (ms)')
axes[2].set_ylabel('Count')

plt.suptitle('ISI Distributions – All Three Neurons', fontsize=13, y=1.02)
plt.tight_layout()
plt.show()

**Answers:**

- **Perfectly regular neuron (A):** The ISI histogram is a single sharp bar at exactly 10 ms. Every interval is identical, so there is zero spread.
- **Effect of noise (B):** The histogram turns into a bell-shaped curve centered around 10 ms. The noise adds random variability so intervals scatter around the mean instead of all being exactly 10.
- **Bursting neuron (C):** The histogram is bimodal (two peaks). One peak is at very short intervals (~2 ms) representing spikes within the same burst, and the other is at a longer interval (~92 ms) representing the gap between bursts. This two-peak shape is the signature of bursting behavior.

### Part C: Generate and Classify My Own Neuron

In [None]:
# Simulate a regular neuron with Gaussian noise
# I chose std = 2 ms (within the allowed 1–2 ms range)
np.random.seed(123)
my_intervals = np.random.normal(loc=10, scale=2, size=600)
my_intervals = np.abs(my_intervals)  # intervals can't be negative
my_spikes = np.cumsum(my_intervals)
my_spikes = my_spikes[my_spikes <= 5000]  # stay within 5 seconds

# Calculate ISI for my neuron
my_isi = np.diff(my_spikes)

print(f"My neuron: {len(my_spikes)} spikes generated")
print(f"Mean ISI: {my_isi.mean():.2f} ms,  Std ISI: {my_isi.std():.2f} ms")

In [None]:
# Compare my neuron's ISI to the three original neurons (overlay plot)
fig, ax = plt.subplots(figsize=(9, 5))

ax.hist(isi_a, bins=np.arange(0, 25, 0.5), color='#e74c3c', alpha=0.45, label='Neuron A (Regular)', edgecolor='black')
ax.hist(isi_b, bins=np.arange(0, 25, 0.5), color='#3498db', alpha=0.45, label='Neuron B (Noisy, std=1.5)', edgecolor='black')
ax.hist(my_isi, bins=np.arange(0, 25, 0.5), color='#9b59b6', alpha=0.6, label='My Neuron (std=2)', edgecolor='black')

ax.set_xlabel('Inter-spike interval (ms)')
ax.set_ylabel('Count')
ax.set_title('ISI Comparison – My Synthetic Neuron vs. Original Neurons')
ax.legend()
ax.set_xlim(0, 25)
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

**Answers:**

- My synthetic neuron most closely resembles **Neuron B** (regular with noise). Both show a bell-shaped ISI histogram centered around 10 ms.
- The key feature that led me to this conclusion is the Gaussian spread around the mean – unlike Neuron A which is a single sharp bar, and unlike Neuron C which has two separate peaks (bimodal).
- Increasing the standard deviation (e.g. from 1.5 ms to 2 ms) makes the bell curve **wider and flatter**, meaning more variability in the firing intervals. A smaller std would make it narrower and closer to Neuron A's perfectly regular pattern.

---
## Q2: Laptop Price Dataset Analysis

### Import the dataset

In [None]:
df = pd.read_csv('laptop_price_-_dataset.csv')

print("Shape:", df.shape)
df.head()

### Task 1: Plot the price of all laptops

In [None]:
fig, ax = plt.subplots(figsize=(14, 5))

ax.bar(range(len(df)), df['Price (Euro)'], color='#3498db', width=1.0, alpha=0.7)
ax.set_xlabel('Laptop Index')
ax.set_ylabel('Price (Euro)')
ax.set_title('Price of All Laptops in the Dataset')
ax.grid(True, axis='y', alpha=0.3)

plt.tight_layout()
plt.show()

### Task 2: Average price by company – which is most expensive?

In [None]:
avg_price_per_company = df.groupby('Company')['Price (Euro)'].mean().sort_values(ascending=False)

print("--- Average Laptop Price by Company ---")
print(avg_price_per_company.round(2))
print(f"\nMost expensive on average: {avg_price_per_company.idxmax()} (€{avg_price_per_company.max():.2f})")

In [None]:
fig, ax = plt.subplots(figsize=(12, 5))

# Highlight the most expensive company in red
colors = ['#e74c3c' if c == avg_price_per_company.idxmax() else '#3498db' for c in avg_price_per_company.index]
bars = ax.bar(avg_price_per_company.index, avg_price_per_company.values, color=colors, edgecolor='black', alpha=0.8)

ax.set_xlabel('Company')
ax.set_ylabel('Average Price (Euro)')
ax.set_title('Average Laptop Price by Company (Red = Most Expensive)')
ax.tick_params(axis='x', rotation=45)
ax.grid(True, axis='y', alpha=0.3)

# Add price labels on top of each bar
for bar in bars:
    height = bar.get_height()
    ax.text(bar.get_x() + bar.get_width()/2., height + 10,
            f'€{height:.0f}', ha='center', va='bottom', fontsize=8)

plt.tight_layout()
plt.show()

### Task 3: Find and clean up Operating Systems

In [None]:
# Check what's in OpSys before cleaning
print("--- OpSys values BEFORE cleaning ---")
print(df['OpSys'].value_counts())

In [None]:
# "macOS" and "Mac OS X" are the same OS – unify to "macOS"
# "Windows 10 S" is just a variant of Windows 10 – unify to "Windows 10"
df['OpSys'] = df['OpSys'].replace({
    'Mac OS X': 'macOS',
    'Windows 10 S': 'Windows 10'
})

print("--- OpSys values AFTER cleaning ---")
print(df['OpSys'].value_counts())
print(f"\nTotal unique operating systems: {df['OpSys'].nunique()}")

### Task 4: Price distribution for each operating system

In [None]:
opsys_sorted = df['OpSys'].value_counts().index.tolist()  # ordered by frequency
n_os = len(opsys_sorted)

# Grid layout for subplots
n_cols = 3
n_rows = int(np.ceil(n_os / n_cols))

fig, axes = plt.subplots(n_rows, n_cols, figsize=(15, n_rows * 3.5))
axes = axes.flatten()

colors_os = ['#e74c3c', '#3498db', '#2ecc71', '#9b59b6', '#f39c12', '#1abc9c', '#e67e22']

for i, os_name in enumerate(opsys_sorted):
    subset = df[df['OpSys'] == os_name]['Price (Euro)']
    axes[i].hist(subset, bins=25, color=colors_os[i % len(colors_os)], edgecolor='black', alpha=0.8)
    axes[i].set_title(f'{os_name} (n={len(subset)})')
    axes[i].set_xlabel('Price (Euro)')
    axes[i].set_ylabel('Count')
    axes[i].grid(True, alpha=0.3)

# Hide any leftover empty subplots
for j in range(i + 1, len(axes)):
    axes[j].set_visible(False)

fig.suptitle('Price Distribution by Operating System', fontsize=14, y=1.02)
plt.tight_layout()
plt.show()

### Task 5: RAM vs Price – relationship and outlier detection

In [None]:
# --- Outlier detection using the IQR method ---
# IQR = distance between Q1 (25th percentile) and Q3 (75th percentile)
# Anything beyond 1.5 * IQR outside of [Q1, Q3] is considered an outlier.
# This method works well because it doesn't assume prices follow a normal distribution.

Q1 = df['Price (Euro)'].quantile(0.25)
Q3 = df['Price (Euro)'].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR

outliers = df[(df['Price (Euro)'] < lower_bound) | (df['Price (Euro)'] > upper_bound)]
non_outliers = df[(df['Price (Euro)'] >= lower_bound) & (df['Price (Euro)'] <= upper_bound)]

print(f"Q1: €{Q1:.2f},  Q3: €{Q3:.2f},  IQR: €{IQR:.2f}")
print(f"Outlier bounds: [€{lower_bound:.2f}, €{upper_bound:.2f}]")
print(f"Outliers found: {len(outliers)} out of {len(df)} laptops")

In [None]:
fig, ax = plt.subplots(figsize=(10, 6))

# Normal laptops in blue, outliers in red X marks
ax.scatter(non_outliers['RAM (GB)'], non_outliers['Price (Euro)'],
           color='#3498db', alpha=0.6, edgecolors='black', linewidths=0.5,
           label='Normal', s=40)
ax.scatter(outliers['RAM (GB)'], outliers['Price (Euro)'],
           color='#e74c3c', alpha=0.9, label=f'Outliers (n={len(outliers)})',
           s=80, marker='x', linewidths=2.5)

# Add a linear trend line
z = np.polyfit(df['RAM (GB)'], df['Price (Euro)'], 1)
p = np.poly1d(z)
x_line = np.linspace(df['RAM (GB)'].min(), df['RAM (GB)'].max(), 100)
ax.plot(x_line, p(x_line), color='#e67e22', linewidth=2, linestyle='--', label='Trend line')

ax.set_xlabel('RAM (GB)')
ax.set_ylabel('Price (Euro)')
ax.set_title('Relationship Between RAM and Laptop Price')
ax.legend()
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Print correlation coefficient
corr = df['RAM (GB)'].corr(df['Price (Euro)'])
print(f"\nCorrelation (RAM vs Price): {corr:.3f}")
print("-> Strong positive correlation: more RAM generally means a higher price.")

### Task 6: Extract Storage Type from Memory column

In [None]:
# The Memory column has entries like "128GB SSD", "500GB HDD", "128GB SSD +  1TB HDD", "32GB Flash Storage"
# We extract just the storage TYPE (SSD, HDD, Flash Storage, Hybrid, or combo)

def extract_storage_type(memory_str):
    memory_str = str(memory_str)
    if 'Flash Storage' in memory_str:
        return 'Flash Storage'
    elif 'Hybrid' in memory_str:
        return 'Hybrid'
    elif 'SSD' in memory_str and 'HDD' in memory_str:
        return 'SSD + HDD'
    elif 'SSD' in memory_str:
        return 'SSD'
    elif 'HDD' in memory_str:
        return 'HDD'
    else:
        return 'Unknown'

df['Storage type'] = df['Memory'].apply(extract_storage_type)

print("--- Storage Type Distribution ---")
print(df['Storage type'].value_counts())

In [None]:
# Show some examples to verify extraction worked correctly
print("--- Examples: Memory  →  Storage type ---")
print(df[['Memory', 'Storage type']].drop_duplicates().sort_values('Storage type').to_string())

### Final check – DataFrame with the new column

In [None]:
df[['Company', 'Product', 'Memory', 'Storage type', 'Price (Euro)']].head(15)