In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from ydata_profiling import ProfileReport
import re
import contextlib
import io
import matplotlib.pyplot as plt
import pandas as pd
import json

  from .autonotebook import tqdm as notebook_tqdm


In [None]:

df = pd.read_csv("gastos_fake.csv")
profile = ProfileReport(df, title="Relat√≥rio Financeiro", explorative=True)
profile_json = profile.to_json()
json_data = json.loads(profile_json)

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 4/4 [00:00<00:00, 12.74it/s]0<00:00,  8.00it/s, Describe variable: Valor]    
Summarize dataset: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 14/14 [00:01<00:00, 12.69it/s, Completed]                
Render JSON: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 55.86it/s]


In [3]:
model_id = "deepseek-ai/deepseek-coder-1.3b-instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)

In [4]:
def gerar_codigo_local(prompt: str, max_new_tokens=2000) -> str:
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(model.device)
    output_ids = model.generate(
        input_ids,
        max_new_tokens=max_new_tokens,
        pad_token_id=tokenizer.eos_token_id,
        do_sample=True,
        temperature=0.7
    )
    resposta = tokenizer.decode(output_ids[0][input_ids.shape[-1]:], skip_special_tokens=True)
    return resposta

In [None]:
def executar_codigo_ia(codigo: str, df: pd.DataFrame):
    stdout = io.StringIO()
    try:
        with contextlib.redirect_stdout(stdout):
            exec(codigo, {"pd": pd, "plt": plt, "df": df})
    except Exception as e:
        print("‚ùå Erro ao executar:", e)
    print(stdout.getvalue())

In [6]:
prompt = """
You are a Python coding assistant.

You are given a pandas DataFrame called `df` already loaded in the environment. Do NOT create or load any new DataFrames.

Write Python code that:
1. Groups expenses by category and sums the values
2. Plots a pie chart and a bar chart using matplotlib
3. Saves the figures using plt.savefig("...") instead of showing them
4. Includes brief comments explaining each part

Return only the valid Python code (no explanations, no markdown).
"""


In [None]:
def extrair_codigo_puro(resposta_llm: str) -> str:
    if "```python" in resposta_llm:
        codigo = re.findall(r"```python(.*?)```", resposta_llm, re.DOTALL)
        return codigo[0].strip() if codigo else resposta_llm.strip()


In [None]:
codigo_gerado = gerar_codigo_local(prompt)
codigo_puro = extrair_codigo_puro(codigo_gerado)
print("Code generated by the DeepSeek model:\n", codigo_gerado)
executar_codigo_ia(codigo_gerado, df)

üîß C√≥digo gerado pela IA:
 
```python
import matplotlib.pyplot as plt

# Step 1: Group expenses by category and sums the values
expenses_by_category = df.groupby('Category')['Expense'].sum()

# Step 2: Plot a pie chart
fig1, ax1 = plt.subplots()
expenses_by_category.plot(kind='pie', ax=ax1, autopct='%1.1f%%')
ax1.set_title('Expenses by Category')
plt.savefig("pie_chart.png")

# Step 3: Plot a bar chart
fig2, ax2 = plt.subplots()
expenses_by_category.plot(kind='bar', ax=ax2)
ax2.set_title('Expenses by Category')
plt.savefig("bar_chart.png")
```

Note: Assumptions made:
1. You have a column 'Category' in your DataFrame.
2. You have a column 'Expense' in your DataFrame.
3. Your DataFrame is named 'df'.
4. It is assumed that 'Expenses' is a numerical value. If it's not the case, you should use the correct expression for summing values.
5. Pie chart and Bar chart are plotted for visualization, based on pandas' plot function, which is used to create plots of data.
6. The pie and bar chart