In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from wordcloud import WordCloud
from collections import Counter
import re

# Set style
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette("Set2")

# Read data
df = pd.read_csv('DissertationIncluded.csv', encoding='utf-8')
df['text'] = df['title'].fillna('') + ' ' + df['abstract'].fillna('')

print(f"Analyzing {len(df)} papers from {df['year'].min()} to {df['year'].max()}")

# ============================================================================
# VISUALIZATION 1: Publications Over Time
# ============================================================================
fig, ax = plt.subplots(figsize=(10, 6))
year_counts = df['year'].value_counts().sort_index()

bars = ax.bar(year_counts.index, year_counts.values, 
              color='steelblue', edgecolor='black', alpha=0.7)
ax.set_xlabel('Year', fontsize=12, fontweight='bold')
ax.set_ylabel('Number of Publications', fontsize=12, fontweight='bold')
ax.set_title('Publication Trend: Blockchain in EV Charging (2018-2026)', 
             fontsize=14, fontweight='bold')
ax.grid(axis='y', alpha=0.3)

# Add value labels
for bar in bars:
    height = bar.get_height()
    ax.text(bar.get_x() + bar.get_width()/2., height,
            f'{int(height)}',
            ha='center', va='bottom', fontsize=10, fontweight='bold')

plt.tight_layout()
plt.savefig('temporal_trend.png', dpi=300, bbox_inches='tight')
print("✓ Saved: temporal_trend.png")
plt.close()

# ============================================================================
# VISUALIZATION 2: Research Themes
# ============================================================================
themes_data = {
    'Security & Privacy': 62,
    'Payment &\nTransactions': 55,
    'Scalability &\nPerformance': 43,
    'Energy Trading': 42,
    'Sustainability': 35,
    'Smart Contracts': 31,
    'IoT Integration': 21,
    'V2G/V2V\nIntegration': 15
}

fig, ax = plt.subplots(figsize=(10, 6))
y_pos = np.arange(len(themes_data))
bars = ax.barh(y_pos, list(themes_data.values()), 
               color='coral', edgecolor='black', alpha=0.7)
ax.set_yticks(y_pos)
ax.set_yticklabels(list(themes_data.keys()), fontsize=10)
ax.set_xlabel('Number of Studies', fontsize=12, fontweight='bold')
ax.set_title('Research Themes in Blockchain-EV Charging Studies (n=81)', 
             fontsize=14, fontweight='bold')
ax.grid(axis='x', alpha=0.3)

# Add value labels and percentages
for i, (bar, value) in enumerate(zip(bars, themes_data.values())):
    width = bar.get_width()
    percentage = (value / 81) * 100
    ax.text(width + 1, bar.get_y() + bar.get_height()/2.,
            f'{value} ({percentage:.1f}%)',
            ha='left', va='center', fontsize=9, fontweight='bold')

plt.tight_layout()
plt.savefig('research_themes.png', dpi=300, bbox_inches='tight')
print("✓ Saved: research_themes.png")
plt.close()

# ============================================================================
# VISUALIZATION 3: Application Focus Pie Chart
# ============================================================================
applications = {
    'Payment Systems': 39,
    'Charging Infrastructure': 33,
    'Energy Trading': 32,
    'Supply Chain': 13,
    'Authentication': 10,
    'Traffic Management': 9,
    'V2G Operations': 8
}

fig, ax = plt.subplots(figsize=(10, 8))
colors = ['#ff9999', '#66b3ff', '#99ff99', '#ffcc99', 
          '#ff99cc', '#c2c2f0', '#ffb3e6']
wedges, texts, autotexts = ax.pie(applications.values(), 
                                    labels=applications.keys(),
                                    autopct='%1.1f%%',
                                    colors=colors,
                                    startangle=90,
                                    textprops={'fontsize': 11})
ax.set_title('Application Focus Distribution in Studies', 
             fontsize=14, fontweight='bold', pad=20)

# Bold the percentage text
for autotext in autotexts:
    autotext.set_color('white')
    autotext.set_fontweight('bold')
    autotext.set_fontsize(10)

plt.tight_layout()
plt.savefig('application_focus.png', dpi=300, bbox_inches='tight')
print("✓ Saved: application_focus.png")
plt.close()

# ============================================================================
# VISUALIZATION 4: Word Cloud from Abstracts
# ============================================================================
# Combine all abstracts
all_text = ' '.join(df['text'].dropna())

# Remove common stop words and generic terms
stopwords = set(['the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 
                 'to', 'for', 'of', 'with', 'by', 'from', 'as', 'is', 
                 'was', 'are', 'been', 'be', 'this', 'that', 'these', 
                 'those', 'can', 'could', 'would', 'should', 'may', 
                 'might', 'will', 'shall'])

# Create word cloud
wordcloud = WordCloud(width=1600, height=800,
                     background_color='white',
                     stopwords=stopwords,
                     colormap='viridis',
                     max_words=100,
                     relative_scaling=0.5,
                     min_font_size=10).generate(all_text)

fig, ax = plt.subplots(figsize=(16, 8))
ax.imshow(wordcloud, interpolation='bilinear')
ax.axis('off')
ax.set_title('Key Terms in Blockchain-EV Charging Literature', 
             fontsize=16, fontweight='bold', pad=20)
plt.tight_layout()
plt.savefig('wordcloud.png', dpi=300, bbox_inches='tight')
print("✓ Saved: wordcloud.png")
plt.close()

# ============================================================================
# VISUALIZATION 5: Research Methods
# ============================================================================
methods = {
    'Theoretical/\nConceptual': 70,
    'Experimental': 27,
    'Prototype/\nImplementation': 20,
    'Simulation': 14,
    'Survey/Review': 11,
    'Case Study': 2
}

fig, ax = plt.subplots(figsize=(10, 6))
y_pos = np.arange(len(methods))
bars = ax.barh(y_pos, list(methods.values()), 
               color='lightgreen', edgecolor='black', alpha=0.7)
ax.set_yticks(y_pos)
ax.set_yticklabels(list(methods.keys()), fontsize=10)
ax.set_xlabel('Number of Studies', fontsize=12, fontweight='bold')
ax.set_title('Research Methods Used (n=81)', 
             fontsize=14, fontweight='bold')
ax.grid(axis='x', alpha=0.3)

# Add value labels
for i, (bar, value) in enumerate(zip(bars, methods.values())):
    width = bar.get_width()
    percentage = (value / 81) * 100
    ax.text(width + 1, bar.get_y() + bar.get_height()/2.,
            f'{value} ({percentage:.1f}%)',
            ha='left', va='center', fontsize=9, fontweight='bold')

plt.tight_layout()
plt.savefig('research_methods.png', dpi=300, bbox_inches='tight')
print("✓ Saved: research_methods.png")
plt.close()

# ============================================================================
# VISUALIZATION 6: Key Challenges
# ============================================================================
challenges = {
    'Latency/\nReal-time': 21,
    'Scalability': 17,
    'Energy\nConsumption': 15,
    'Security\nThreats': 11,
    'Regulatory\nIssues': 10,
    'Privacy\nConcerns': 5,
    'Interoperability': 4,
    'User Adoption': 1
}

fig, ax = plt.subplots(figsize=(10, 6))
bars = ax.bar(range(len(challenges)), list(challenges.values()),
              color='salmon', edgecolor='black', alpha=0.7)
ax.set_xticks(range(len(challenges)))
ax.set_xticklabels(list(challenges.keys()), rotation=45, ha='right', fontsize=10)
ax.set_ylabel('Number of Studies', fontsize=12, fontweight='bold')
ax.set_title('Key Challenges Identified in Literature', 
             fontsize=14, fontweight='bold')
ax.grid(axis='y', alpha=0.3)

# Add value labels
for bar in bars:
    height = bar.get_height()
    ax.text(bar.get_x() + bar.get_width()/2., height,
            f'{int(height)}',
            ha='center', va='bottom', fontsize=9, fontweight='bold')

plt.tight_layout()
plt.savefig('key_challenges.png', dpi=300, bbox_inches='tight')
print("✓ Saved: key_challenges.png")
plt.close()

# ============================================================================
# VISUALIZATION 7: Combined Dashboard
# ============================================================================
fig = plt.figure(figsize=(18, 12))
fig.suptitle('Systematic Review Analysis Dashboard: Blockchain in EV Charging', 
             fontsize=18, fontweight='bold', y=0.98)

# Subplot 1: Temporal trend
ax1 = plt.subplot(2, 3, 1)
year_counts = df['year'].value_counts().sort_index()
ax1.bar(year_counts.index, year_counts.values, color='steelblue', 
        edgecolor='black', alpha=0.7)
ax1.set_xlabel('Year', fontsize=10, fontweight='bold')
ax1.set_ylabel('Publications', fontsize=10, fontweight='bold')
ax1.set_title('A. Temporal Distribution', fontsize=11, fontweight='bold')
ax1.grid(axis='y', alpha=0.3)

# Subplot 2: Top themes
ax2 = plt.subplot(2, 3, 2)
top_themes = {'Security': 62, 'Payment': 55, 'Scalability': 43, 
              'Trading': 42, 'Sustainability': 35}
y_pos = np.arange(len(top_themes))
ax2.barh(y_pos, list(top_themes.values()), color='coral', 
         edgecolor='black', alpha=0.7)
ax2.set_yticks(y_pos)
ax2.set_yticklabels(list(top_themes.keys()), fontsize=9)
ax2.set_xlabel('Studies', fontsize=10, fontweight='bold')
ax2.set_title('B. Top 5 Research Themes', fontsize=11, fontweight='bold')
ax2.grid(axis='x', alpha=0.3)

# Subplot 3: Application focus
ax3 = plt.subplot(2, 3, 3)
apps = {'Payment': 39, 'Charging': 33, 'Trading': 32, 'Supply Chain': 13}
colors = ['#ff9999', '#66b3ff', '#99ff99', '#ffcc99']
ax3.pie(apps.values(), labels=apps.keys(), autopct='%1.1f%%',
        colors=colors, startangle=90, textprops={'fontsize': 9})
ax3.set_title('C. Application Focus', fontsize=11, fontweight='bold')

# Subplot 4: Research methods
ax4 = plt.subplot(2, 3, 4)
methods_short = {'Theoretical': 70, 'Experimental': 27, 'Prototype': 20, 
                 'Simulation': 14}
y_pos = np.arange(len(methods_short))
ax4.barh(y_pos, list(methods_short.values()), color='lightgreen',
         edgecolor='black', alpha=0.7)
ax4.set_yticks(y_pos)
ax4.set_yticklabels(list(methods_short.keys()), fontsize=9)
ax4.set_xlabel('Studies', fontsize=10, fontweight='bold')
ax4.set_title('D. Research Methods', fontsize=11, fontweight='bold')
ax4.grid(axis='x', alpha=0.3)

# Subplot 5: Challenges
ax5 = plt.subplot(2, 3, 5)
chall = {'Latency': 21, 'Scalability': 17, 'Energy': 15, 'Security': 11}
ax5.bar(range(len(chall)), list(chall.values()), color='salmon',
        edgecolor='black', alpha=0.7)
ax5.set_xticks(range(len(chall)))
ax5.set_xticklabels(list(chall.keys()), fontsize=9)
ax5.set_ylabel('Studies', fontsize=10, fontweight='bold')
ax5.set_title('E. Key Challenges', fontsize=11, fontweight='bold')
ax5.grid(axis='y', alpha=0.3)

# Subplot 6: Summary statistics
ax6 = plt.subplot(2, 3, 6)
ax6.axis('off')
summary_text = f"""
SUMMARY STATISTICS

Total Studies: 81
Time Period: 2018-2026
Peak Year: 2025 (22 papers)

Average Authors: 4.98
Most Common: 6 authors

Recent Surge (2024-26): 48.1%

Top Theme: Security & Privacy
   → 62 studies (76.5%)

Top Application: Payments
   → 39 studies (48.1%)

Top Challenge: Latency
   → 21 studies (25.9%)
"""
ax6.text(0.1, 0.95, summary_text, transform=ax6.transAxes,
         fontsize=10, verticalalignment='top',
         bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5),
         family='monospace')
ax6.set_title('F. Summary Statistics', fontsize=11, fontweight='bold')

plt.tight_layout()
plt.savefig('dashboard_combined.png', dpi=300, bbox_inches='tight')
print("✓ Saved: dashboard_combined.png")
plt.close()

print("\n" + "="*60)
print("ALL VISUALIZATIONS CREATED SUCCESSFULLY!")
print("="*60)
print("\nGenerated files:")
print("1. temporal_trend.png")
print("2. research_themes.png")
print("3. application_focus.png")
print("4. wordcloud.png")
print("5. research_methods.png")
print("6. key_challenges.png")
print("7. dashboard_combined.png")

Analyzing 81 papers from 2018 to 2026
✓ Saved: temporal_trend.png
✓ Saved: research_themes.png
✓ Saved: application_focus.png
✓ Saved: wordcloud.png
✓ Saved: research_methods.png
✓ Saved: key_challenges.png
✓ Saved: dashboard_combined.png

ALL VISUALIZATIONS CREATED SUCCESSFULLY!

Generated files:
1. temporal_trend.png
2. research_themes.png
3. application_focus.png
4. wordcloud.png
5. research_methods.png
6. key_challenges.png
7. dashboard_combined.png
