# 1. Left-Right Orientation

In [30]:
import os
import pandas as pd
import matplotlib.pyplot as plt
from openpyxl import load_workbook
import numpy as np

# Configuration
input_file = 'Cleaned_Survey_Data1-LeftRight.xlsx'
output_folder = 'plots'

# Ensure plots directory exists
os.makedirs(output_folder, exist_ok=True)

def load_data():
    """Load all data from the Excel file"""
    wb = load_workbook(input_file)
    sheets = wb.sheetnames
    
    # Get row headers (excluding the first 'Year' row)
    if 'RowHeaders' not in sheets:
        raise ValueError("RowHeaders sheet not found in the Excel file")
    row_headers = pd.read_excel(input_file, sheet_name='RowHeaders', header=None)[0].tolist()[1:]  # Skip first 'Year' row
    
    # Load all country data
    country_data = {}
    for sheet in sheets:
        if sheet == 'RowHeaders':
            continue
        
        # Extract country code
        country_code = sheet.split('_')[0]
        df = pd.read_excel(input_file, sheet_name=sheet)
        
        # Keep all rows (remove iloc[1:] that was skipping first data row)
        df.reset_index(drop=True, inplace=True)
        
        # Verify alignment
        if len(df) != len(row_headers):
            print(f"Warning: Data length mismatch in {sheet} ({len(df)} rows vs {len(row_headers)} headers)")
            min_len = min(len(df), len(row_headers))
            df = df.iloc[:min_len]
            current_headers = row_headers[:min_len]
        else:
            current_headers = row_headers
        
        country_data[country_code] = {
            'data': df,
            'headers': current_headers
        }
    
    return country_data

def generate_plots(country_data):
    """Generate histograms for each country-year combination"""
    for country_code, data_dict in country_data.items():
        df = data_dict['data']
        headers = data_dict['headers']

        # Get years (all numeric columns except 'Row_Headers')
        years = [col for col in df.columns if col != 'Row_Headers' and str(col).isdigit()]
        
        # Get indices of percentage responses
        pct_indices = [i for i, h in enumerate(headers) if isinstance(h, str) and '%' in h]
        pct_headers = [headers[i] for i in pct_indices]
        
        for year in years:
            plt.figure(figsize=(12, 6))
            
            # Get percentage values and handle missing data
            values = []
            for idx in pct_indices:
                val = df[year].iloc[idx]
                try:
                    # Try to convert to float, if fails use 0
                    values.append(float(val) * 100)
                except (ValueError, TypeError):
                    values.append(0.0)  # Default to 0 for missing/invalid data
            
            values = np.array(values)
            
            # Create clean x-axis labels
            x_labels = []
            for h in pct_headers:
                if '1(' in h:
                    x_labels.append('1%')
                elif '2(' in h:
                    x_labels.append('2%')
                elif '3(' in h:
                    x_labels.append('3%')
                elif '4(' in h:
                    x_labels.append('4%')
                elif '5(' in h:
                    x_labels.append('5%')
                elif '6(' in h:
                    x_labels.append('6%')
                elif '7(' in h:
                    x_labels.append('7%')
                elif '8(' in h:
                    x_labels.append('8%')
                elif '9(' in h:
                    x_labels.append('9%')
                elif '10(' in h:
                    x_labels.append('10%')
                elif 'Refus' in h:
                    x_labels.append('Refusal%')
                elif 'DK' in h:
                    x_labels.append("Don't Know%")
                else:
                    x_labels.append(h)
            
            # Create bar plot
            bars = plt.bar(x_labels, values)
            
            # Add value labels
            for bar in bars:
                height = bar.get_height()
                plt.text(bar.get_x() + bar.get_width()/2., height,
                         f'{height:.1f}%',
                         ha='center', va='bottom')
            
            plt.title(f'{country_code} - {year}\nResponse Distribution')
            plt.xlabel('Response Category')
            plt.ylabel('Percentage of Respondents')
            plt.ylim(0, 100)
            plt.xticks(rotation=45)
            plt.tight_layout()
            
            # Save plot
            plot_filename = f"{output_folder}/{country_code}_{year}.png"
#             plt.savefig(plot_filename, dpi=300, bbox_inches='tight')
            plt.close()
            
            print(f"Created plot: {plot_filename}")

# Main execution
if __name__ == '__main__':
    print("Loading data...")
    country_data = load_data()
    print("Generating plots...")
    generate_plots(country_data)
    
    print("All plots generated successfully!")

Loading data...
Generating plots...
Created plot: plots/BE-LeftRight_2007.png
Created plot: plots/BE-LeftRight_2010.png
Created plot: plots/BE-LeftRight_2012.png
Created plot: plots/BE-LeftRight_2014.png
Created plot: plots/BE-LeftRight_2015.png
Created plot: plots/BE-LeftRight_2016.png
Created plot: plots/BE-LeftRight_2017.png
Created plot: plots/BE-LeftRight_2019.png
Created plot: plots/BG-LeftRight_2007.png
Created plot: plots/BG-LeftRight_2010.png
Created plot: plots/BG-LeftRight_2012.png
Created plot: plots/BG-LeftRight_2014.png
Created plot: plots/BG-LeftRight_2015.png
Created plot: plots/BG-LeftRight_2016.png
Created plot: plots/BG-LeftRight_2017.png
Created plot: plots/BG-LeftRight_2019.png
Created plot: plots/CZ-LeftRight_2007.png
Created plot: plots/CZ-LeftRight_2010.png
Created plot: plots/CZ-LeftRight_2012.png
Created plot: plots/CZ-LeftRight_2014.png
Created plot: plots/CZ-LeftRight_2015.png
Created plot: plots/CZ-LeftRight_2016.png
Created plot: plots/CZ-LeftRight_2017.pn

Created plot: plots/SI-LeftRight_2012.png
Created plot: plots/SI-LeftRight_2014.png
Created plot: plots/SI-LeftRight_2015.png
Created plot: plots/SI-LeftRight_2016.png
Created plot: plots/SI-LeftRight_2017.png
Created plot: plots/SI-LeftRight_2019.png
Created plot: plots/SK-LeftRight_2007.png
Created plot: plots/SK-LeftRight_2010.png
Created plot: plots/SK-LeftRight_2012.png
Created plot: plots/SK-LeftRight_2014.png
Created plot: plots/SK-LeftRight_2015.png
Created plot: plots/SK-LeftRight_2016.png
Created plot: plots/SK-LeftRight_2017.png
Created plot: plots/SK-LeftRight_2019.png
Created plot: plots/FI-LeftRight_2007.png
Created plot: plots/FI-LeftRight_2010.png
Created plot: plots/FI-LeftRight_2012.png
Created plot: plots/FI-LeftRight_2014.png
Created plot: plots/FI-LeftRight_2015.png
Created plot: plots/FI-LeftRight_2016.png
Created plot: plots/FI-LeftRight_2017.png
Created plot: plots/FI-LeftRight_2019.png
Created plot: plots/SE-LeftRight_2007.png
Created plot: plots/SE-LeftRight_2

# 2. EP Image

In [21]:
import os
import pandas as pd
import matplotlib.pyplot as plt
from openpyxl import load_workbook
import numpy as np

# Configuration
raw_data_folder = 'RawData'
mapping_file = 'Sheet_Mappings.xlsx'
output_folder = 'plots/EP_Image'
question_name = 'EP_Image'  # Used for output filenames

# Ensure plots directory exists
os.makedirs(output_folder, exist_ok=True)

# Load sheet mappings
sheet_mappings = pd.read_excel(mapping_file, sheet_name='EP_Image')
year_to_sheet = dict(zip(sheet_mappings['Year'], sheet_mappings['Sheet']))

# Define expected row headers (based on your example)
row_headers = [
    'TOTAL',
    'Très positive', 'Very positive',
    'Assez positive', 'Fairly positive',
    'Neutre', 'Neutral',
    'Assez négative', 'Fairly negative',
    'Très négative', 'Very negative',
    'NSP', 'DK',
    'Positive', 'Positive',
    'Négative', 'Negative'
]

# Country codes (from your example)
country_columns = ['BE', 'BG', 'CZ', 'DK', 'D-W', 'DE', 'D-E', 'EE', 'EL', 'ES', 
                  'FR', 'IE', 'IT', 'CY', 'LV', 'LT', 'LU', 'HU', 'MT', 'NL', 
                  'AT', 'PL', 'PT', 'RO', 'SI', 'SK', 'FI', 'SE', 'UK']

def process_year_file(year, sheet_name):
    """Process a single year's data file"""
    file_path = os.path.join(raw_data_folder, f"{year}.xls")
#     print(file_path)
    if not os.path.exists(file_path):
        print(f"Warning: File not found for year {year}")
        return None
    
    try:
        # Read the Excel file
        df = pd.read_excel(file_path, sheet_name=sheet_name, header=None)
        
        # Find the header row (looking for country codes)
        header_row = None
        for i in range(min(20, len(df))):  # Check first 20 rows
            if any(str(col).strip() in country_columns for col in df.iloc[i].values):
                header_row = i
                break
        
        if header_row is None:
            print(f"Could not find country headers in {year} file")
            return None
        # Re-read with proper header
        df = pd.read_excel(file_path, sheet_name=sheet_name, header=header_row)
        df.columns = [str(col).strip() for col in df.columns]

        # Find the data start row (looking for 'TOTAL')
        data_start = None
        for i in range(0, min(20, len(df))):
            if str(df.iloc[i, 1]).strip() == 'TOTAL':
                data_start = i
                break
        
        if data_start is None:
            print(f"Could not find data start row in {year} file")
            return None
        
        # Extract the data block
        data_block = df.iloc[data_start:data_start+len(row_headers)]
        
        # Process each country
        results = {}
        for country in country_columns:
            if country in data_block.columns:
                # Get all values for this country
                values = data_block[country].reset_index(drop=True)
                
                # Extract percentage rows (odd indices after 0)
                pct_values = []
                for i in range(1, len(values), 2):
                    try:
                        val = str(values[i]).strip().replace('%', '').replace(',', '.')
                        pct_values.append(float(val) if val.replace('.', '').isdigit() else 0.0)
                    except:
                        pct_values.append(0.0)
                
                # The response categories we want to plot
                response_categories = [
                    'Very positive',
                    'Fairly positive',
                    'Neutral',
                    'Fairly negative',
                    'Very negative',
                    'DK'
                ]
                
                # Map the percentages to these categories
                country_data = {
                    'Very positive': pct_values[0],  # From 'Very positive' row
                    'Fairly positive': pct_values[1],  # From 'Fairly positive' row
                    'Neutral': pct_values[2],  # From 'Neutral' row
                    'Fairly negative': pct_values[3],  # From 'Fairly negative' row
                    'Very negative': pct_values[4],  # From 'Very negative' row
                    "Don't know": pct_values[5]   # From 'DK' row
                }
                
                results[country] = country_data
        
        return results
    
    except Exception as e:
        print(f"Error processing {year} file: {str(e)}")
        return None

def plot_country_data(country_code, year_data):
    """Generate histogram for a country across years"""
    # Prepare data for plotting
    years = sorted(year_data.keys())
    categories = ['Very positive', 'Fairly positive', 'Neutral', 
                 'Fairly negative', 'Very negative', "Don't know"]
    colors = ['#4CAF50', '#8BC34A', '#FFC107', '#FF9800', '#F44336', '#9E9E9E']
    
    # Create stacked bar chart data
    plot_data = {cat: [] for cat in categories}
    for year in years:
        for cat in categories:
            plot_data[cat].append(year_data[year].get(cat, 0))
    
    # Plot
    fig, ax = plt.subplots(figsize=(12, 6))
    
    bottom = np.zeros(len(years))
    for i, cat in enumerate(categories):
        ax.bar(years, plot_data[cat], bottom=bottom, label=cat, color=colors[i])
        bottom += np.array(plot_data[cat])
    
    # Formatting
    ax.set_title(f"European Parliament Image - {country_code}")
    ax.set_xlabel('Year')
    ax.set_ylabel('Number of Respondents')
#     ax.set_ylim(0, 100)
    ax.legend(loc='upper right', bbox_to_anchor=(1.15, 1))
    plt.xticks(np.arange(2006,2020), rotation=45)
    plt.tight_layout()
    
    # Save plot
    plot_filename = f"{output_folder}/{question_name}_{country_code}.png"
    plt.savefig(plot_filename, dpi=300, bbox_inches='tight')
    plt.close()
    
    print(f"Created plot: {plot_filename}")

def main():
    # Process all years
    all_data = {}
    for year, sheet_name in year_to_sheet.items():
        print(f"Processing year {year}...")
        year_data = process_year_file(year, sheet_name)
        if year_data:
            for country, data in year_data.items():
                if country not in all_data:
                    all_data[country] = {}
                all_data[country][year] = data
    
    # Generate plots for each country
    for country_code, year_data in all_data.items():
        plot_country_data(country_code, year_data)
    
    print("All plots generated successfully!")

if __name__ == '__main__':
    main()

Processing year 2007...
Processing year 2011...
Processing year 2012...
Processing year 2014...
Processing year 2015...
Processing year 2016...
Processing year 2017...
Processing year 2019...
Created plot: plots/EP_Image/EP_Image_BE.png
Created plot: plots/EP_Image/EP_Image_BG.png
Created plot: plots/EP_Image/EP_Image_CZ.png
Created plot: plots/EP_Image/EP_Image_DK.png
Created plot: plots/EP_Image/EP_Image_D-W.png
Created plot: plots/EP_Image/EP_Image_DE.png
Created plot: plots/EP_Image/EP_Image_D-E.png
Created plot: plots/EP_Image/EP_Image_EE.png
Created plot: plots/EP_Image/EP_Image_EL.png
Created plot: plots/EP_Image/EP_Image_ES.png
Created plot: plots/EP_Image/EP_Image_FR.png
Created plot: plots/EP_Image/EP_Image_IE.png
Created plot: plots/EP_Image/EP_Image_IT.png
Created plot: plots/EP_Image/EP_Image_CY.png
Created plot: plots/EP_Image/EP_Image_LV.png
Created plot: plots/EP_Image/EP_Image_LT.png
Created plot: plots/EP_Image/EP_Image_LU.png
Created plot: plots/EP_Image/EP_Image_HU.