<a href="https://colab.research.google.com/github/aeleraqi/DataVIZ_Tool/blob/main/DataVIZ_Tool.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install pandas
!pip install plotly
!pip install openpyxl
!pip install -U kaleido

Collecting kaleido
  Downloading kaleido-0.2.1-py2.py3-none-manylinux1_x86_64.whl.metadata (15 kB)
Downloading kaleido-0.2.1-py2.py3-none-manylinux1_x86_64.whl (79.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.9/79.9 MB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: kaleido
Successfully installed kaleido-0.2.1


In [6]:
import pandas as pd
import plotly.express as px
import plotly.io as pio

# Function to load dataset
def load_data():
    print("Choose a dataset type:")
    print("1. Upload a CSV file")
    print("2. Upload an Excel file")
    choice = input("Enter 1 or 2: ")

    try:
        file_path = input("Enter the path to your file (e.g., /path/to/your/file.csv): ")
        if choice == '1':
            return pd.read_csv(file_path)
        elif choice == '2':
            return pd.read_excel(file_path)
        else:
            print("Invalid choice. Please enter 1 or 2.")
            return load_data()
    except Exception as e:
        print(f"Error loading file: {e}. Please try again.")
        return load_data()

# Function to classify and display variables
def classify_variables(data):
    classification = {}
    for col in data.columns:
        if pd.api.types.is_numeric_dtype(data[col]):
            if pd.api.types.is_integer_dtype(data[col]):
                classification[col] = 'Discrete (Quantitative)'
            else:
                classification[col] = 'Continuous (Quantitative)'
        elif pd.api.types.is_string_dtype(data[col]) or data[col].nunique() < 20:
            classification[col] = 'Categorical (Qualitative)'
        else:
            classification[col] = 'Other'
    return classification

# Function to display variables and their types in a formatted manner
def display_variable_classification(classification):
    print("\nVariable Classification:")
    print(f"{'Index':<6}{'Variable Name':<20}{'Variable Type'}")
    print("-" * 40)
    for idx, (var, var_type) in enumerate(classification.items()):
        print(f"{idx:<6}{var:<20}{var_type}")
    print("\n")

# Function to handle user inputs and display available variables
def get_variables(data, chart_type):
    classification = classify_variables(data)
    display_variable_classification(classification)

    columns = list(data.columns)
    no_y_axis_charts = ['4', '7']  # Histogram and Pie chart

    while True:
        try:
            # Select x variables by number
            x_var_indices = input("Enter the index of the variable for the x-axis (comma separated): ").split(',')
            x_vars = [columns[int(i.strip())] for i in x_var_indices if i.strip().isdigit()]

            # If chart doesn't require a y-axis, skip y-axis selection
            if chart_type not in no_y_axis_charts:
                y_var_index = input("Enter the index of the variable for the y-axis: ")

                # Validate y-axis variable index
                if not y_var_index.strip().isdigit():
                    print("Invalid input. The y-axis variable must be an integer index.")
                    continue

                y_var = columns[int(y_var_index.strip())]
            else:
                y_var = None

            return x_vars, y_var
        except (IndexError, ValueError):
            print("Invalid input. Please enter valid indices for the variables.")

# Function to select theme
def select_theme():
    print("\nChoose a theme for the chart:")
    print("1. Plotly (default)")
    print("2. ggplot2")
    print("3. Seaborn")
    print("4. Simple White")
    print("5. Plotly Dark")
    print("6. None (minimal)")

    theme_choice = input("Enter the number corresponding to the theme: ")

    themes = {
        '1': 'plotly',
        '2': 'ggplot2',
        '3': 'seaborn',
        '4': 'simple_white',
        '5': 'plotly_dark',
        '6': 'none'
    }

    return themes.get(theme_choice, 'plotly')  # Default to 'plotly'

# Function to create plots based on user choice
def create_plot(data, chart_type, x_vars, y_var):
    fig = None

    # Create the plot
    try:
        if chart_type == '1':  # Scatter plot
            fig = px.scatter(data, x=x_vars[0], y=y_var)
        elif chart_type == '2':  # Line plot
            fig = px.line(data, x=x_vars[0], y=y_var)
        elif chart_type == '3':  # Bar plot
            fig = px.bar(data, x=x_vars[0], y=y_var)
        elif chart_type == '4':  # Histogram
            fig = px.histogram(data, x=x_vars[0])
        elif chart_type == '5':  # Box plot
            fig = px.box(data, x=x_vars[0], y=y_var)
        elif chart_type == '6':  # Area plot
            fig = px.area(data, x=x_vars[0], y=y_var)
        elif chart_type == '7':  # Pie chart
            fig = px.pie(data, names=x_vars[0], values=data[x_vars[0]].value_counts().values)
        elif chart_type == '8':  # Heatmap
            fig = px.imshow(data.corr())
        elif chart_type == '9':  # Violin plot
            fig = px.violin(data, y=y_var, x=x_vars[0])
        elif chart_type == '10':  # Bubble chart
            size_var_index = input("Enter the index of the second variable for bubble size: ")
            size_var = list(data.columns)[int(size_var_index.strip())]
            fig = px.scatter(data, x=x_vars[0], y=y_var, size=size_var)
        elif chart_type == '11':  # Density Heatmap
            fig = px.density_heatmap(data, x=x_vars[0], y=y_var)
        elif chart_type == '12':  # Stacked Bar Chart
            fig = px.bar(data, x=x_vars[0], color=x_vars[1], barmode='stack')
        elif chart_type == '13':  # Scatter Matrix
            fig = px.scatter_matrix(data, dimensions=x_vars)

    except Exception as e:
        print(f"Error creating plot: {e}")

    return fig

# Function to handle chart selection and generate plots
def generate_chart(data, theme):
    while True:
        print("\nChoose a chart type:")
        print("1. Scatter plot (requires two numerical variables)")
        print("2. Line plot (requires two numerical variables)")
        print("3. Bar plot (requires one categorical and one numerical variable)")
        print("4. Histogram (requires one numerical variable)")
        print("5. Box plot (requires one categorical and one numerical variable)")
        print("6. Area plot (requires one numerical variable)")
        print("7. Pie chart (requires one categorical variable)")
        print("8. Heatmap (requires numerical data)")
        print("9. Violin plot (requires one categorical and one numerical variable)")
        print("10. Bubble chart (requires two numerical variables and one for size)")
        print("11. Density Heatmap (requires two numerical variables)")
        print("12. Stacked Bar Chart (requires two categorical variables)")
        print("13. Scatter Matrix (requires multiple numerical variables)")
        print("14. Exit")

        chart_type = input("Enter the number corresponding to the chart type: ")

        if chart_type == '14':
            print("Exiting chart selection.")
            return

        # Get x and y variables based on the chart type
        x_vars, y_var = get_variables(data, chart_type)

        # Create and display the plot
        fig = create_plot(data, chart_type, x_vars, y_var)

        # Update the layout with the selected theme
        fig.update_layout(template=theme)
        fig.show()

        # Ask the user if they want to continue or exit
        exit_option = input("Would you like to create another chart? (y/n): ").strip().lower()
        if exit_option != 'y':
            break

# Main function to execute the script
def main():
    data = load_data()
    theme = select_theme()
    generate_chart(data, theme)

if __name__ == "__main__":
    main()


Choose a dataset type:
1. Upload a CSV file
2. Upload an Excel file
Enter 1 or 2: 1
Enter the path to your file (e.g., /path/to/your/file.csv): /content/Housing.csv

Choose a theme for the chart:
1. Plotly (default)
2. ggplot2
3. Seaborn
4. Simple White
5. Plotly Dark
6. None (minimal)
Enter the number corresponding to the theme: 4

Choose a chart type:
1. Scatter plot (requires two numerical variables)
2. Line plot (requires two numerical variables)
3. Bar plot (requires one categorical and one numerical variable)
4. Histogram (requires one numerical variable)
5. Box plot (requires one categorical and one numerical variable)
6. Area plot (requires one numerical variable)
7. Pie chart (requires one categorical variable)
8. Heatmap (requires numerical data)
9. Violin plot (requires one categorical and one numerical variable)
10. Bubble chart (requires two numerical variables and one for size)
11. Density Heatmap (requires two numerical variables)
12. Stacked Bar Chart (requires two cate

Would you like to create another chart? (y/n): y

Choose a chart type:
1. Scatter plot (requires two numerical variables)
2. Line plot (requires two numerical variables)
3. Bar plot (requires one categorical and one numerical variable)
4. Histogram (requires one numerical variable)
5. Box plot (requires one categorical and one numerical variable)
6. Area plot (requires one numerical variable)
7. Pie chart (requires one categorical variable)
8. Heatmap (requires numerical data)
9. Violin plot (requires one categorical and one numerical variable)
10. Bubble chart (requires two numerical variables and one for size)
11. Density Heatmap (requires two numerical variables)
12. Stacked Bar Chart (requires two categorical variables)
13. Scatter Matrix (requires multiple numerical variables)
14. Exit
Enter the number corresponding to the chart type: 4

Variable Classification:
Index Variable Name       Variable Type
----------------------------------------
0     price               Discrete (Qua

Would you like to create another chart? (y/n): y

Choose a chart type:
1. Scatter plot (requires two numerical variables)
2. Line plot (requires two numerical variables)
3. Bar plot (requires one categorical and one numerical variable)
4. Histogram (requires one numerical variable)
5. Box plot (requires one categorical and one numerical variable)
6. Area plot (requires one numerical variable)
7. Pie chart (requires one categorical variable)
8. Heatmap (requires numerical data)
9. Violin plot (requires one categorical and one numerical variable)
10. Bubble chart (requires two numerical variables and one for size)
11. Density Heatmap (requires two numerical variables)
12. Stacked Bar Chart (requires two categorical variables)
13. Scatter Matrix (requires multiple numerical variables)
14. Exit
Enter the number corresponding to the chart type: 5

Variable Classification:
Index Variable Name       Variable Type
----------------------------------------
0     price               Discrete (Qua

Would you like to create another chart? (y/n): n
