In [1]:
import pandas as pd
import re

df = pd.read_csv('DissertationIncluded.csv', encoding='utf-8')

# Combine title and abstract
df['text'] = df['title'].fillna('') + ' ' + df['abstract'].fillna('')
df['text'] = df['text'].str.lower()

# Define themes with keywords
themes = {
    'Transparency_Mechanisms': [
        'transparency', 'transparent', 'audit', 'traceability', 
        'immutable', 'tamper-proof', 'verifiable'
    ],
    'Efficiency_Improvements': [
        'efficiency', 'efficient', 'optimize', 'optimization', 
        'cost reduction', 'time saving', 'resource utilization'
    ],
    'Interoperability': [
        'interoperability', 'interoperable', 'standardization', 
        'compatibility', 'cross-platform', 'integration'
    ],
    'Security_Privacy': [
        'security', 'secure', 'privacy', 'authentication', 
        'encryption', 'cryptography', 'attack', 'vulnerability'
    ],
    'Smart_Contracts': [
        'smart contract', 'decentralized application', 'dapp',
        'ethereum', 'solidity', 'chaincode'
    ],
    'V2G_V2V': [
        'vehicle-to-grid', 'v2g', 'vehicle-to-vehicle', 'v2v',
        'grid integration', 'bidirectional charging'
    ],
    'Energy_Trading': [
        'energy trading', 'peer-to-peer', 'p2p', 'energy market',
        'electricity trading', 'prosumer'
    ],
    'Payment_Systems': [
        'payment', 'transaction', 'billing', 'cryptocurrency',
        'token', 'micropayment', 'roaming'
    ],
    'Scalability': [
        'scalability', 'scalable', 'throughput', 'performance',
        'sharding', 'layer 2', 'off-chain'
    ],
    'Sustainability': [
        'sustainability', 'sustainable', 'renewable', 'carbon',
        'green', 'environmental', 'emission'
    ]
}

# Code papers
for theme_name, keywords in themes.items():
    df[theme_name] = df['text'].apply(
        lambda x: 1 if any(kw in x for kw in keywords) else 0
    )

# Calculate theme frequencies
theme_counts = {}
for theme in themes.keys():
    count = df[theme].sum()
    percentage = (count / len(df)) * 100
    theme_counts[theme] = {'count': count, 'percentage': percentage}

# Display results
print("THEMATIC ANALYSIS RESULTS")
print("="*60)
for theme, stats in sorted(theme_counts.items(), 
                           key=lambda x: x[1]['count'], 
                           reverse=True):
    print(f"{theme.replace('_', ' ')}: {stats['count']} papers ({stats['percentage']:.1f}%)")

# Save coded data
df.to_csv('papers_coded_by_theme.csv', index=False)
print("\n✓ Saved: papers_coded_by_theme.csv")

# Create theme co-occurrence matrix
theme_cols = list(themes.keys())
cooccurrence = df[theme_cols].T.dot(df[theme_cols])
cooccurrence.to_csv('theme_cooccurrence.csv')
print("✓ Saved: theme_cooccurrence.csv")

THEMATIC ANALYSIS RESULTS
Efficiency Improvements: 65 papers (80.2%)
Security Privacy: 64 papers (79.0%)
Sustainability: 42 papers (51.9%)
Transparency Mechanisms: 40 papers (49.4%)
Payment Systems: 40 papers (49.4%)
Scalability: 37 papers (45.7%)
Energy Trading: 35 papers (43.2%)
Smart Contracts: 33 papers (40.7%)
Interoperability: 23 papers (28.4%)
V2G V2V: 16 papers (19.8%)

✓ Saved: papers_coded_by_theme.csv
✓ Saved: theme_cooccurrence.csv
