In [None]:
# Step 1: Install required libraries and configure the API key
!pip install -q -U google-generativeai pandas matplotlib seaborn

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m91.2/91.2 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.4/12.4 MB[0m [31m84.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.6/8.6 MB[0m [31m107.7 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
google-colab 1.0.0 requires pandas==2.2.2, but you have pandas 2.3.1 which is incompatible.
dask-cudf-cu12 25.2.2 requires pandas<2.2.4dev0,>=2.0, but you have pandas 2.3.1 which is incompatible.
cudf-cu12 25.2.1 requires pandas<2.2.4dev0,>=2.0, but you have pandas 2.3.1 which is incompatible.[0m[31m
[0m

In [None]:

import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import google.generativeai as genai
from google.colab import userdata
from IPython.display import Image, display

# Securely get the Google API key from Colab Secrets
try:
    api_key = userdata.get('GOOGLE_API_KEY')
    genai.configure(api_key=api_key)
    print("Google API Key configured successfully.")
except userdata.SecretNotFoundError:
    print("ERROR: Colab Secret 'GOOGLE_API_KEY' not found.")
    print("Please add your API key using the key icon in the left sidebar.")
except Exception as e:
    print(f"An error occurred: {e}")

Google API Key configured successfully.


In [None]:
# Step 2: Create necessary directories and Python modules

# Create the required directories
os.makedirs("uploads", exist_ok=True)
os.makedirs("outputs", exist_ok=True)

In [None]:
# Create the tools.py file
%%writefile tools.py
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

def execute_python_code(code: str, data_path: str, output_path: str):
    """
    Executes Python code generated by the LLM.
    """
    try:
        df = pd.read_csv(data_path)
        local_scope = {
            'pd': pd,
            'plt': plt,
            'sns': sns,
            'df': df,
            'output_path': output_path
        }
        exec(code, {}, local_scope)
        return f"Chart successfully generated and saved as '{output_path}'."
    except Exception as e:
        # Make the error message more descriptive
        error_message = f"An error occurred during code execution: {str(e)}"
        print(error_message)
        return error_message


Writing tools.py


In [None]:
# Create the agent.py file
%%writefile agent.py
import google.generativeai as genai
import pandas as pd

def generate_visualization_code(user_prompt: str, columns: list):
    """
    Generates visualization code based on the user prompt and data columns.
    """
    model = genai.GenerativeModel('gemini-1.5-flash')
    prompt_template = f"""
    You are an expert data scientist who specializes in using pandas, matplotlib, and seaborn.
    Your task is to write Python code to generate a visualization based on a user request, using a pandas DataFrame named 'df'.

    RULES:
    - Only write Python code. Do not add any other explanations or markdown.
    - Your code should assume that a pandas DataFrame named 'df' is already loaded.
    - The columns of the dataset are: {columns}
    - The generated chart must be saved to the path specified by the 'output_path' variable. Use plt.savefig(output_path).
    - After saving the chart, call plt.close() to close the figure. This prevents memory leaks.
    - Do not use 'import' statements in your code; the libraries are already provided.

    USER REQUEST:
    "{user_prompt}"

    PYTHON CODE:
    """
    try:
        response = model.generate_content(prompt_template)
        # Clean up the code block from the LLM's response
        generated_code = response.text.strip().replace("```python", "").replace("```", "")
        return generated_code
    except Exception as e:
        print(f"An error occurred while getting a response from the LLM: {e}")
        return None

# Create a sample data file (state_sales.csv)
sample_data = pd.DataFrame({
    'State': ['California', 'Texas', 'Florida', 'New York', 'Pennsylvania', 'Illinois'],
    'Sales': [1200, 1100, 950, 800, 750, 700],
    'Customer_Count': [85, 92, 75, 70, 65, 68]
})
sample_data.to_csv("uploads/state_sales.csv", index=False)

print("File environment created successfully: tools.py, agent.py, uploads/state_sales.csv")

Overwriting agent.py


In [None]:
# Step 3: Run the main application and display the result

# Import the modules we created in the previous cell
from agent import generate_visualization_code
from tools import execute_python_code

# --- Main Logic Start ---

# 1. Set File Paths
data_file = "uploads/state_sales.csv"
output_file = "outputs/visualization.png"

# 2. Load Data and Read Columns
try:
    df = pd.read_csv(data_file)
    columns = df.columns.tolist()
    print(f"Data loaded successfully. Columns: {columns}")
    print("Sample Data:")
    display(df.head())
except FileNotFoundError:
    print(f"ERROR: '{data_file}' not found.")
    # The script should stop here if the file is not found
    exit()

# 3. Get Command from User
user_prompt = input("\nPlease enter your visualization request (e.g., bar chart of the top 3 states by sales):\n> ")

if user_prompt:
    print("\n[Process] Requesting visualization code from the LLM...")

    # 4. Call the Agent to Generate Python Code
    generated_code = generate_visualization_code(user_prompt, columns)

    if generated_code:
        print("[Process] Code received from LLM:\n---")
        print(generated_code)
        print("---\n[Process] Executing code...")

        # 5. Use the Tool to Execute the Code
        result = execute_python_code(generated_code, data_file, output_file)
        print(f"\n[Result] {result}")

        # 6. Display the Generated Chart
        if os.path.exists(output_file):
            print("\nGenerated Chart:")
            display(Image(filename=output_file))
        else:
            print("\nThe chart file was not created. Please check the code generated by the LLM and any error messages.")
    else:
        print("ERROR: Code could not be generated.")
else:
    print("No valid request was entered.")

Data loaded successfully. Columns: ['State', 'Sales', 'Customer_Count']
Sample Data:


Unnamed: 0,State,Sales,Customer_Count
0,California,1200,85
1,Texas,1100,92
2,Florida,950,75
3,New York,800,70
4,Pennsylvania,750,65



Please enter your visualization request (e.g., bar chart of the top 3 states by sales):
> en fazla satış yapan ilk 3 state'in bar chart çiz

[Process] Requesting visualization code from the LLM...
[Process] Code received from LLM:
---

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

top_3_states = df.groupby('State')['Sales'].sum().nlargest(3).index
top_3_data = df[df['State'].isin(top_3_states)]

plt.figure(figsize=(10, 6))
sns.barplot(x='State', y='Sales', data=top_3_data, estimator=sum)
plt.title('Top 3 States by Total Sales')
plt.xlabel('State')
plt.ylabel('Total Sales')
plt.xticks(rotation=45, ha='right')

output_path = 'top_3_states_sales.png'
plt.savefig(output_path)
plt.close()


---
[Process] Executing code...

[Result] Chart successfully generated and saved as 'outputs/visualization.png'.

The chart file was not created. Please check the code generated by the LLM and any error messages.
