# DATA AND CORRELATION - COLCAP AND STOCKS

### LIBRARIES

In [1]:
import os
import glob
import pandas as pd

In [None]:
# os.chdir("..")

### IDENTIFY THE NAME OF FOLDERS

In [2]:
# Get all folder names dynamically (excluding files)
folders = [f for f in os.listdir() if os.path.isdir(f)]
folders

['BCOLOMBIA',
 'BHI',
 'BOGOTA',
 'BVC',
 'CELSIA',
 'CEMARGOS',
 'CNEC',
 'COLCAP',
 'CONCONCRETO',
 'CORFICOL',
 'ECOPETROL',
 'ENKA',
 'ETB',
 'EXITO',
 'FABRICATO',
 'GEB',
 'GRUBOLIVAR',
 'GRUPOARGOS',
 'GRUPOAVAL',
 'GRUPOSURA',
 'GXTESCOL',
 'HCOLSEL',
 'ISA',
 'MINEROS',
 'NUTRESA',
 'PEI',
 'PFAVAL',
 'PFBCOLOMBIA',
 'PFCEMARGOS',
 'PFCORFICOL',
 'PFDAVVNDA',
 'PFGRUPARG',
 'PFGRUPOSURA',
 'PROMIGAS',
 'TERPEL']

### DEBUGGING THE FILES TO AVOID PROBLEMS

In [3]:
df_list = []

for folder in folders:
    os.chdir(folder)  
    all_files = glob.glob("*.csv")

    print(f"Processing folder: {folder} | Found CSVs: {all_files}")  # Debugging step

    if not all_files:  # If no CSVs found, skip folder
        print(f"No CSV files found in {folder}, skipping...")
        os.chdir("..")
        continue

    df_folder = pd.concat([pd.read_csv(f, sep=";", on_bad_lines='skip') for f in all_files], ignore_index=True)
    
    os.chdir("..")
    
df_folder

Processing folder: BCOLOMBIA | Found CSVs: ['BCOLOMBIA_20250306_095438.csv', 'BCOLOMBIA_20250306_095448.csv', 'BCOLOMBIA_20250306_095453.csv', 'BCOLOMBIA_20250306_095459.csv', 'BCOLOMBIA_20250306_095504.csv', 'BCOLOMBIA_20250306_095510.csv', 'BCOLOMBIA_20250306_095518.csv', 'BCOLOMBIA_20250306_095525.csv', 'BCOLOMBIA_20250306_095532.csv', 'BCOLOMBIA_20250306_095537.csv', 'BCOLOMBIA_20250316_114657.csv']
Processing folder: BHI | Found CSVs: ['BHI_20250311_075103.csv', 'BHI_20250311_075130.csv', 'BHI_20250311_075138.csv', 'BHI_20250311_075145.csv', 'BHI_20250311_075151.csv', 'BHI_20250311_075157.csv', 'BHI_20250316_115152.csv']
Processing folder: BOGOTA | Found CSVs: ['BOGOTA_20250311_084231.csv', 'BOGOTA_20250311_084240.csv', 'BOGOTA_20250311_084248.csv', 'BOGOTA_20250311_084255.csv', 'BOGOTA_20250311_084302.csv', 'BOGOTA_20250311_084308.csv', 'BOGOTA_20250311_084325.csv', 'BOGOTA_20250311_084331.csv', 'BOGOTA_20250311_084340.csv', 'BOGOTA_20250311_084347.csv', 'BOGOTA_20250316_115256.c

Unnamed: 0,Fecha,Nemot√©cnico,Precio cierre,Precio m√°ximo,Precio promedio ponderado,Precio m√≠nimo,Variaci√≥n absoluta,Variaci√≥n porcentual,Cantidad,Volumen
0,2024-09-06,TERPEL,9400.00,9500.00,9400.77,9340.00,-100.0,-1.05,1470713.00,13825828700.00
1,2024-09-09,TERPEL,9640.00,9640.00,9556.00,9500.00,240.0,2.55,26147.00,249860710.00
2,2024-09-10,TERPEL,9600.00,9650.00,9611.51,9500.00,-40.0,-0.41,22556.00,216797280.00
3,2024-09-11,TERPEL,9400.00,9500.00,9455.91,9400.00,-200.0,-2.08,38380.00,362917900.00
4,2024-09-12,TERPEL,9580.00,9580.00,9455.58,9400.00,180.0,1.91,15069.00,142486210.00
...,...,...,...,...,...,...,...,...,...,...
1212,2020-08-21,TERPEL,6950.00,6950.00,6836.30,6800.00,110.0,1.61,24920.00,170360640.00
1213,2020-08-24,TERPEL,6950.00,6950.00,6947.76,6940.00,0.0,0.00,23745.00,164974590.00
1214,2020-08-25,TERPEL,6980.00,6980.00,6968.22,6950.00,30.0,0.43,33088.00,230564300.00
1215,2020-08-26,TERPEL,7240.00,7350.00,7183.15,7040.00,260.0,3.72,122628.00,880855440.00


### MAIN PART OF THE CODE
#### Identify, read, concatenate the csv files and make setups

In [4]:
# List to store concatenated DataFrames for each folder

df_list = []

for folder in folders:
    os.chdir(folder)  # Change to the folder
    extension = 'csv'
    
    # Identify all CSV files in the folder
    all_files = [i for i in glob.glob(f'*.{extension}')]

    # Read and concatenate all CSVs in the current folder
    df_folder = pd.concat([pd.read_csv(f,sep=";") for f in all_files], ignore_index=True)

    # Ensure 'Fecha' column is in datetime format
    df_folder['Fecha'] = pd.to_datetime(df_folder['Fecha'], errors='coerce')
    
    # Rename columns by adding folder name as a prefix (except 'Date')
    df_folder = df_folder.rename(columns={col: f"{folder}_{col}" for col in df_folder.columns if col != "Fecha"})

    # Append the concatenated DataFrame to the list
    df_list.append(df_folder)

    # Go back to the main directory before processing the next folder
    os.chdir("..")

In [None]:
# # Ensure df_list is not empty before merging
# if not df_list:
#     raise ValueError("No data was loaded. Check if CSV files exist and contain valid data.")

# # Merge all DataFrames on 'Date', keeping all available dates
# df_final = df_list[0]  # Start with the first DataFrame

# for df in df_list[1:]:
#     df_final = pd.merge(df_final, df, on="Fecha", how="outer", suffixes=('', '_dup'))

# # Drop duplicate columns if merging added them (from stocks with same column names)
# df_final = df_final.loc[:, ~df_final.columns.duplicated()]

# # Sort by date
# df_final = df_final.sort_values(by="Fecha")

# # Now df_final is a single DataFrame that you can manipulate with pandas
# print(df_final.head())  # Show first few rows

# # Save the merged DataFrame
# df_final.to_csv("Final_Merged_Data2.csv", index=False, encoding='utf-8-sig')

# df_final


In [5]:
print(os.getcwd())

c:\Users\Admin\OneDrive - Universidad de La Salle\SEBASTIAN\Documentos\HV_HTML\BVC


#### Merge all the csv in one csv

In [6]:
# Merge all DataFrames on 'Date' (keeping only common dates)
df_final = df_list[0]
for df in df_list[1:]:
    df_final = pd.merge(df_final, df, on="Fecha", how="outer", suffixes=('', '_dup'))
    
# Sort by date
df_final = df_final.sort_values(by="Fecha")

# Identify numeric columns (excluding 'Date' and stock name columns)
numeric_cols = df_final.columns[df_final.dtypes == 'object']

# Apply string replacements to clean decimal format
df_final[numeric_cols] = df_final[numeric_cols].apply(lambda x: x.str.replace(',', '', regex=False))

# Identify numeric columns (excluding 'Date' and stock name columns)
numeric_cols = df_final.select_dtypes(include=['number']).columns

# Convert only numeric columns to float
df_final[numeric_cols] = df_final[numeric_cols].apply(pd.to_numeric, errors='coerce')

# Save final merged DataFrame
df_final.to_csv("Final_Merged_Data.csv", index=False, encoding='utf-8-sig')

# Show the first few rows of the final DataFrame
print(df_final.head())

          Fecha BCOLOMBIA_Nemot√©cnico BCOLOMBIA_Precio cierre  \
1229 2020-03-02                   NaN                     NaN   
1230 2020-03-03                   NaN                     NaN   
1231 2020-03-04                   NaN                     NaN   
1232 2020-03-05                   NaN                     NaN   
1110 2020-03-06             BCOLOMBIA                36700.00   

     BCOLOMBIA_Precio m√°ximo BCOLOMBIA_Precio promedio ponderado  \
1229                     NaN                                 NaN   
1230                     NaN                                 NaN   
1231                     NaN                                 NaN   
1232                     NaN                                 NaN   
1110                37620.00                            36923.99   

     BCOLOMBIA_Precio m√≠nimo BCOLOMBIA_Variaci√≥n absoluta  \
1229                     NaN                          NaN   
1230                     NaN                          NaN   
1231         

In [6]:
df_final.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1233 entries, 1229 to 1228
Columns: 313 entries, Fecha to TERPEL_Volumen
dtypes: datetime64[ns](1), float64(71), object(241)
memory usage: 3.0+ MB


In [7]:
df = pd.read_csv("Final_Merged_Data.csv", sep=',', parse_dates=["Fecha"])
df.columns = df.columns.str.strip()
# df.columns = df.columns.str.strip().str.replace(" ", "_")  # Normalize column names
print(df.columns.tolist())

['Fecha', 'BCOLOMBIA_Nemot√©cnico', 'BCOLOMBIA_Precio cierre', 'BCOLOMBIA_Precio m√°ximo', 'BCOLOMBIA_Precio promedio ponderado', 'BCOLOMBIA_Precio m√≠nimo', 'BCOLOMBIA_Variaci√≥n absoluta', 'BCOLOMBIA_Variaci√≥n porcentual', 'BCOLOMBIA_Cantidad', 'BCOLOMBIA_Volumen', 'BHI_Nemot√©cnico', 'BHI_Precio cierre', 'BHI_Precio m√°ximo', 'BHI_Precio promedio ponderado', 'BHI_Precio m√≠nimo', 'BHI_Variaci√≥n absoluta', 'BHI_Variaci√≥n porcentual', 'BHI_Cantidad', 'BHI_Volumen', 'BOGOTA_Nemot√©cnico', 'BOGOTA_Precio cierre', 'BOGOTA_Precio m√°ximo', 'BOGOTA_Precio promedio ponderado', 'BOGOTA_Precio m√≠nimo', 'BOGOTA_Variaci√≥n absoluta', 'BOGOTA_Variaci√≥n porcentual', 'BOGOTA_Cantidad', 'BOGOTA_Volumen', 'BVC_Nemot√©cnico', 'BVC_Precio cierre', 'BVC_Precio m√°ximo', 'BVC_Precio promedio ponderado', 'BVC_Precio m√≠nimo', 'BVC_Variaci√≥n absoluta', 'BVC_Variaci√≥n porcentual', 'BVC_Cantidad', 'BVC_Volumen', 'CELSIA_Nemot√©cnico', 'CELSIA_Precio cierre', 'CELSIA_Precio m√°ximo', 'CELSIA_Precio pr

In [9]:
df_close

NameError: name 'df_close' is not defined

In [8]:
import pandas as pd

# Load the merged DataFrame
df = pd.read_csv("Final_Merged_Data.csv", sep=',', parse_dates=["Fecha"])

# Select only 'Close' price columns and 'COLCAP_Valor hoy'
close_cols = [col for col in df.columns if "cierre" in col.lower() or col == 'COLCAP_Valor hoy']
# Create a DataFrame with only 'Date', 'Close Price' columns, and 'COLCAP_Valor hoy'
df_close = df[["Fecha"] + close_cols].copy()

# Ensure 'Date' column is in datetime format
df_close["Fecha"] = pd.to_datetime(df_close["Fecha"])

# Create 'YearMonth' column for grouping
df_close["YearMonth"] = df_close["Fecha"].dt.to_period("M")

# Drop 'Date' and 'YearMonth' before correlation calculations
df_corr = df_close.drop(columns=["Fecha", "YearMonth"])

# Compute monthly correlation and keep it as a DataFrame
monthly_corr = df_close.groupby("YearMonth").corr()

# Handle missing data by forward-filling, then back-filling
df_close.fillna(method='ffill', inplace=True)  # Fill forward
df_close.fillna(method='bfill', inplace=True)  # Fill backward

# Select the specific stock for correlation analysis
target_stock = "COLCAP_Valor hoy"  # Replace with your chosen stock column name

# **1Ô∏è‚É£ Calculate Historical Correlation (Full Period)**
historical_corr = df_corr.corr()[target_stock].drop(target_stock, errors="ignore")
# **2Ô∏è‚É£ Calculate Monthly Correlation**
monthly_corr = monthly_corr.xs(target_stock, level=1, axis=0)

# Drop self-correlation if needed
monthly_corr = monthly_corr.drop(target_stock, axis=1, errors="ignore")

# Save results
historical_corr.to_csv("Historical_Correlation.csv")
monthly_corr.to_csv("Monthly_Correlation.csv")

# Display results
print("üìå Historical Correlation with", target_stock)
print(historical_corr)

print("\nüìå Monthly Correlation with", target_stock)
print(monthly_corr.tail(10))  # Show last 10 months


üìå Historical Correlation with COLCAP_Valor hoy
BCOLOMBIA_Precio cierre      0.705082
BHI_Precio cierre            0.780928
BOGOTA_Precio cierre         0.162733
BVC_Precio cierre            0.226516
CELSIA_Precio cierre         0.489855
CEMARGOS_Precio cierre       0.643139
CNEC_Precio cierre          -0.426040
CONCONCRETO_Precio cierre    0.360060
CORFICOL_Precio cierre       0.292170
ECOPETROL_Precio cierre      0.452651
ENKA_Precio cierre           0.273211
ETB_Precio cierre            0.050187
EXITO_Precio cierre          0.190283
FABRICATO_Precio cierre      0.256540
GEB_Precio cierre            0.602788
GRUBOLIVAR_Precio cierre     0.629054
GRUPOARGOS_Precio cierre     0.701101
GRUPOAVAL_Precio cierre      0.006655
GRUPOSURA_Precio cierre      0.248453
GXTESCOL_Precio cierre      -0.036718
HCOLSEL_Precio cierre        0.906751
ISA_Precio cierre            0.573038
MINEROS_Precio cierre        0.536067
NUTRESA_Precio cierre        0.274875
PEI_Precio cierre            0.642555


  df_close.fillna(method='ffill', inplace=True)  # Fill forward
  df_close.fillna(method='bfill', inplace=True)  # Fill backward


In [9]:
print("\nüìå Monthly Correlation with", target_stock)
print(monthly_corr.tail(10))  # Show last 10 months



üìå Monthly Correlation with COLCAP_Valor hoy
              Fecha  BCOLOMBIA_Precio cierre  BHI_Precio cierre  \
YearMonth                                                         
2024-06   -0.803547                 0.812968          -0.635920   
2024-07   -0.933455                -0.755698           0.881120   
2024-08    0.570673                 0.786307           0.836415   
2024-09   -0.502952                 0.601449          -0.055271   
2024-10    0.824427                 0.927518           0.342642   
2024-11    0.799381                 0.832900          -0.549955   
2024-12   -0.365355                 0.757821           0.167488   
2025-01    0.634973                 0.950209           0.198755   
2025-02    0.929579                 0.965638           0.858792   
2025-03    0.498211                 0.847708           0.470786   

           BOGOTA_Precio cierre  BVC_Precio cierre  CELSIA_Precio cierre  \
YearMonth                                                              

In [10]:
import plotly.express as px

# üîπ Convert 'YearMonth' to string (Fix Period object issue)
monthly_corr.index = monthly_corr.index.astype(str)

# üî• **Add Heatmap for Monthly Correlations**
fig = px.imshow(
    monthly_corr.T,  # Transpose to get stocks on y-axis
    labels=dict(x="Month", y="Stock", color="Correlation"),
    title=f"üìä Monthly Correlation Heatmap - {target_stock}",
    color_continuous_scale="RdBu_r",
    aspect="auto",
)

fig.update_layout(
    autosize=False,
    width=1000,
    height=600,
    xaxis=dict(tickangle=-45),
)

In [11]:
monthly_corr.index = monthly_corr.index.astype(str)
# üî• **Split into Two Groups**
stocks = list(monthly_corr.columns)
midpoint = len(stocks) // 2  # Divide into two equal parts

group1 = stocks[:midpoint]  # First half
group2 = stocks[midpoint:]  # Second half

# **Function to Create Heatmap**
def plot_heatmap(corr_data, group, title):
    fig = px.imshow(
        corr_data[group].T,  # Transpose for better view
        labels=dict(x="Month", y="Stock", color="Correlation"),
        title=title,
        color_continuous_scale="RdBu_r",
        aspect="auto",
    )

    fig.update_layout(
        autosize=False,
        width=1000,
        height=600,
        xaxis=dict(tickangle=-45),
    )

    fig.show()

# **Plot Heatmaps**
plot_heatmap(monthly_corr, group1, f"üìä Monthly Correlation Heatmap (Group 1) - {target_stock}")
plot_heatmap(monthly_corr, group2, f"üìä Monthly Correlation Heatmap (Group 2) - {target_stock}")


In [12]:
from datetime import datetime, timedelta

# Compute the date one year ago from today
one_year_ago = datetime.today() - timedelta(days=365)

# üîπ **Filter data from the last year**
df_close = df_close[df_close["Fecha"] >= one_year_ago]

# Create 'YearMonth' column for grouping
df_close["YearMonth"] = df_close["Fecha"].dt.to_period("M")

# Drop 'Fecha' and 'YearMonth' before correlation calculations
df_corr = df_close.drop(columns=["Fecha", "YearMonth"])

# Compute monthly correlation
monthly_corr = df_close.groupby("YearMonth").corr()

# Handle missing data (Forward-fill & Back-fill)
df_close.fillna(method='ffill', inplace=True)
df_close.fillna(method='bfill', inplace=True)

# Select target stock
target_stock = "COLCAP_Valor hoy"  # Replace with your chosen stock column name

# **1Ô∏è‚É£ Calculate Historical Correlation**
historical_corr = df_corr.corr()[target_stock].drop(target_stock, errors="ignore")

# **2Ô∏è‚É£ Calculate Monthly Correlation**
monthly_corr = monthly_corr.xs(target_stock, level=1, axis=0)
monthly_corr = monthly_corr.drop(target_stock, axis=1, errors="ignore")

# üîπ Convert 'YearMonth' to string (Fix Period object issue)
monthly_corr.index = monthly_corr.index.astype(str)

#delete the column "Fecha"
if "Fecha" in monthly_corr.columns:
    monthly_corr = monthly_corr.drop(columns=["Fecha"])

# üî• **Split into Two Groups**
stocks = list(monthly_corr.columns)
midpoint = len(stocks) // 2  # Divide into two equal parts

group1 = stocks[:midpoint]  # First half
group2 = stocks[midpoint:]  # Second half


# **Function to Create Heatmap**
def plot_heatmap(corr_data, group, title):
    fig = px.imshow(
        corr_data[group].T,  # Transpose for better view
        labels=dict(x="Month", y="Stock", color="Correlation"),
        title=title,
        color_continuous_scale="RdBu_r",
        aspect="auto",
    )

    fig.update_layout(
        autosize=False,
        width=1000,
        height=600,
        xaxis=dict(tickangle=-45),
        template="plotly_white",  # ‚úÖ Clean & professional look
    )

    fig.show()

# **Plot Heatmaps**
plot_heatmap(monthly_corr, group1, f"üìä Monthly Correlation Heatmap (Group 1) - {target_stock}")
plot_heatmap(monthly_corr, group2, f"üìä Monthly Correlation Heatmap (Group 2) - {target_stock}")


DataFrame.fillna with 'method' is deprecated and will raise in a future version. Use obj.ffill() or obj.bfill() instead.


DataFrame.fillna with 'method' is deprecated and will raise in a future version. Use obj.ffill() or obj.bfill() instead.



In [13]:
import pandas as pd
import plotly.express as px
import dash
from dash import dcc, html
from dash.dependencies import Input, Output

# Load the monthly correlation data
df = pd.read_csv("Monthly_Correlation.csv", index_col=0, parse_dates=True)
df.index = pd.to_datetime(df.index.astype(str))  # Ensure index is datetime

# Filter data from March 2024 onwards
df_filtered = df[df.index >= "2024-03"]

# Split columns into two groups
stocks = list(df_filtered.columns)
midpoint = len(stocks) // 2
group1 = stocks[:midpoint]
group2 = stocks[midpoint:]

def plot_heatmap(data, title):
    fig = px.imshow(
        data.T,  # Transpose for better layout
        labels=dict(x="Month", y="Stock", color="Correlation"),
        title=title,
        color_continuous_scale="RdBu_r",
        aspect="auto",
    )
    fig.update_layout(
        autosize=False,
        width=1000,
        height=600,
        xaxis=dict(tickangle=-45),
    )
    return fig

# Dash app setup
app = dash.Dash(__name__)
server = app.server

app.layout = html.Div([
    html.H1("üìä Monthly Correlation Heatmaps", style={'textAlign': 'center'}),
    
    dcc.Dropdown(
        id="group_selector",
        options=[
            {"label": "Group 1", "value": "group1"},
            {"label": "Group 2", "value": "group2"}
        ],
        value="group1",
        clearable=False,
        style={"width": "50%", "margin": "auto"}
    ),
    
    dcc.Graph(id="heatmap")
])

@app.callback(
    Output("heatmap", "figure"),
    Input("group_selector", "value")
)
def update_heatmap(selected_group):
    data = df_filtered[group1] if selected_group == "group1" else df_filtered[group2]
    title = f"üìä Monthly Correlation Heatmap ({selected_group.replace('group', 'Group ')})"
    return plot_heatmap(data, title)

if __name__ == "__main__":
    app.run(debug=True)


In [14]:
df_corr = pd.read_csv("Historical_Correlation.csv", index_col=0, header=None, names=["Stock", "Correlation"], skiprows=1)
df_corr=df_corr.reset_index()
df_corr = df_corr.sort_values("Correlation", ascending=False).reset_index(drop=True)
df_corr

Unnamed: 0,Stock,Correlation
0,HCOLSEL_Precio cierre,0.906751
1,PFBCOLOMBIA_Precio cierre,0.900616
2,PFGRUPOSURA_Precio cierre,0.789961
3,BHI_Precio cierre,0.780928
4,TERPEL_Precio cierre,0.743705
5,BCOLOMBIA_Precio cierre,0.705082
6,GRUPOARGOS_Precio cierre,0.701101
7,CEMARGOS_Precio cierre,0.643139
8,PEI_Precio cierre,0.642555
9,GRUBOLIVAR_Precio cierre,0.629054


In [15]:
import pandas as pd
import plotly.express as px

# Load the correlation CSV
df_corr = pd.read_csv("Historical_Correlation.csv", index_col=0, header=None, names=["Stock", "Correlation"],skiprows=1)

#Reset index to have the correct form of the dataframe
df_corr=df_corr.reset_index()

#turn to float the data
df_corr["Correlation"] = df_corr["Correlation"].astype(float)

#order the data according to the correlation
df_corr = df_corr.sort_values("Correlation", ascending=False).reset_index(drop=True)

# Optional: clean column names
df_corr["Stock"] = df_corr["Stock"].str.replace("_Precio cierre", "", regex=False)

# Create the bar chart
fig = px.bar(
    df_corr,
    x="Correlation",
    y="Stock",
    orientation='h',
    color="Correlation",
    color_continuous_scale='RdBu_r',
    range_color=[-1, 1],
    title="üìà Historical Correlation Stocks vs COLCAP Index Mar-2020 to Mar 2025",
    labels={"Correlation": "Correlation Coefficient", "Stock": "Stock"},
)

# Update layout for better appearance
fig.update_layout(
    yaxis=dict(autorange="reversed"),
    height=800,
    xaxis=dict(tickformat=".2f"),
)

# Show the chart (or use in Dash app)
fig.show()

In [18]:
import pandas as pd
import plotly.express as px
import dash
from dash import dcc, html
from dash.dependencies import Input, Output

#FIRST PART --> CREATION OF HEATMAP

# Load the monthly correlation data
df = pd.read_csv("Monthly_Correlation.csv", index_col=0, parse_dates=True)
df.index = pd.to_datetime(df.index.astype(str))  # Ensure index is datetime

# Filter data from March 2024 onwards
df_filtered = df[df.index >= "2024-03"]

#delete the column "Fecha"
if "Fecha" in df_filtered.columns:
    df_filtered = df_filtered.drop(columns=["Fecha"])

# Split columns into two groups
stocks = list(df_filtered.columns)
midpoint = len(stocks) // 2
group1 = stocks[:midpoint]
group2 = stocks[midpoint:]

def plot_heatmap(data, title):
    fig = px.imshow(
        data.T,  # Transpose for better layout
        labels=dict(x="Month", y="Stock", color="Correlation"),
        title=title,
        color_continuous_scale="RdBu_r",
        aspect="auto",
    )
    fig.update_layout(
        autosize=False,
        width=1000,
        height=600,
        xaxis=dict(tickangle=-45),
    )
    return fig

# SECOND PART --> CREATION OF THE CORRELATION BARCHART

# Load the correlation CSV
df_corr = pd.read_csv("Historical_Correlation.csv", index_col=0, header=None, names=["Stock", "Correlation"],skiprows=1)

#Reset index to have the correct form of the dataframe
df_corr=df_corr.reset_index()

#turn to float the data
df_corr["Correlation"] = df_corr["Correlation"].astype(float)

#order the data according to the correlation
df_corr = df_corr.sort_values("Correlation", ascending=False).reset_index(drop=True)

# Optional: clean column names
df_corr["Stock"] = df_corr["Stock"].str.replace("_Precio cierre", "", regex=False)

def create_bar_chart():
    fig = px.bar(
        df_corr,
        x="Correlation",
        y="Stock",
        orientation="h",
        title="üìä Historical Correlation with COLCAP since 2020",
        color="Correlation",
        color_continuous_scale="RdBu_r",
        range_color=[-1, 1],
        labels={"Correlation": "Correlation Coefficient", "Stock": "Stock"}
    )
    fig.update_layout(
        yaxis=dict(autorange="reversed"),
        height=800,
        xaxis=dict(tickformat=".2f"))
    return fig


# Dash app setup
app = dash.Dash(__name__)
server = app.server

app.layout = html.Div([
    html.H1("üìà Correlation Visualizations", style={'textAlign': 'center'}),
    
    dcc.Dropdown(
        id="group_selector",
        options=[
            {"label": "Group 1", "value": "group1"},
            {"label": "Group 2", "value": "group2"}
        ],
        value="group1",
        clearable=False,
        style={"width": "50%", "margin": "auto"}
    ),
    
    html.Div(
        dcc.Graph(id="heatmap"),
        style={"display": "flex", "justifyContent": "center"}
    ),
    
    dcc.Tab(label="Historical Correlation Bar Chart", children=[
            html.Br(),
            dcc.Graph(figure=create_bar_chart(), style={"margin": "0 auto", "width": "90%"})
        ])
    
])

@app.callback(
    Output("heatmap", "figure"),
    Input("group_selector", "value")
)
def update_heatmap(selected_group):
    data = df_filtered[group1] if selected_group == "group1" else df_filtered[group2]
    title = f"üìä Monthly Correlation Heatmap ({selected_group.replace('group', 'Group ')})"
    return plot_heatmap(data, title)

# Run the server locally
if __name__ == "__main__":
    app.run(debug=True)