# Welcome to the challenge

Toy Trikz has supplied us with a dataset that includes the daily sales figures for various product categories. They have requested us to employ Generative AI to generate a chart with five subplots, each displaying a line chart illustrating the quantity of items sold for each product category. A reference image of the desired chart can be found in the 'subplot.jpg' file located in the challenge task directory.

Follow the instructions in this notebook to complete the challenge.

Good Luck!🍀

Install the needed modules


I want you to follow this package google-gemini / generative-ai-python to convert that code which uses openai package

Importing the needed modules and setting up the API

In [1]:
import pandas as pd
import numpy as np
import os
import openai
from dotenv import load_dotenv
import json
import re
import google.generativeai as genai

import traceback


In [2]:
GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')
# print(GOOGLE_API_KEY)  # Just to verify it works; remove or hide this line for security.

Here are some helper functions for python code generation.

`Don't forget to run the block!`

In [3]:
def generate_chat_response(system_content, user_content):
    # Set up the model
    # model = genai.GenerativeModel('gemini-pro')
    model = genai.GenerativeModel('gemini-1.5-flash')

    # Create the chat
    chat = model.start_chat(history=[
        {'role': 'user', 'parts': [system_content]},
        {'role': 'model', 'parts': ["Understood. I'll proceed as instructed."]}
    ])

    # Generate the response
    response = chat.send_message(user_content, generation_config=genai.types.GenerationConfig(
        max_output_tokens=1200  # Equivalent to max_tokens in OpenAI
    ))

    # Return the generated response
    return response


def extract_code(response_content):
    # Define a regular expression pattern to match text between triple backticks (```)
    pattern = r'```(.*?)```'

    # Use re.findall to find all non-overlapping matches of the pattern in the input string
    matches = re.findall(pattern, response_content, re.DOTALL)

    # Remove the python keyword from in the code and Return the first match found
    return matches[0].replace("python", "")



In [4]:
def generate_code_and_execute(user_content, max_retries=3, execute=True):
    system_content = """
    You are a python code generator. You know pandas. You answer to every question with Python code.
    You return python code wrapped in ``` delimiter. Import any needed python module. And you don't provide any elaborations.
    """

    for attempt in range(max_retries):
        try:
            # Generate chat response
            response = generate_chat_response(system_content, user_content)
            response_content = response.text

            # Extract the code from the response content
            code = extract_code(response_content)

            if execute:
                # Execute the generated Python code
                exec(code, globals())
            
            return code  # Return the successful code

        except Exception as e:
            error_message = f"Error in attempt {attempt + 1}: {str(e)}\n{traceback.format_exc()}"
            print(error_message)

            if attempt < max_retries - 1:
                # If not the last attempt, update the user_content to ask for a fix
                user_content = f"""
                The previous code generated had an error. Please provide a corrected version.
                Error details:
                {error_message}
                Original request:
                {user_content}
                """
            else:
                print(f"Failed to generate valid code after {max_retries} attempts.")
                return None  # Or you could raise an exception here

    return None  # This line will be reached if all attempts fail

In [5]:
def update_code(code, user_content, max_retries=3, execute=True, verbose=True):
    system_content = f"""
    You are a python code generator. You know pandas. You are given the following python method: {code}. Update the code based on the user content. Do not change the method name.
    You return the updated python code wrapped in ``` delimiter. And you don't provide any elaborations.
    """
    for attempt in range(max_retries):
        try:
            response = generate_chat_response(system_content, user_content)
            new_code = extract_code(response.text)

            if not new_code:
                raise ValueError("No code was generated")

            if execute:
                exec(new_code, globals())
            
            if verbose:
                print(f"Successfully updated and executed code on attempt {attempt + 1}")
            return new_code  # Return the successful code

        except Exception as e:
            error_message = f"Error in attempt {attempt + 1}: {str(e)}\n{traceback.format_exc()}"
            if verbose:
                print(error_message)

            if attempt < max_retries - 1:
                user_content = f"""
                The previous code update had an error. Please provide a corrected version.
                Original code:
                {code}
                Error details:
                {error_message}
                Update request:
                {user_content}
                """
            elif verbose:
                print(f"Failed to generate valid code after {max_retries} attempts.")

    return None  # This line will be reached if all attempts fail

Importing the dataset

In [6]:
data = pd.read_csv("challenge_dataset.csv")
data.head()

Unnamed: 0,Date,Product_Category,Items_Sold
0,2017-01-01,Art & Crafts,81
1,2017-01-01,Electronics,204
2,2017-01-01,Games,549
3,2017-01-01,Sports & Outdoors,269
4,2017-01-01,Toys,382


In [7]:
data.dtypes

Date                object
Product_Category    object
Items_Sold           int64
dtype: object

Use the above methods to create the chart shown in 'subplots.jpg' file in the challenge folder.
![alt text](subplots.png)

In [8]:
# write your code here

# user_content =  """
#                     Return a python method called 'add_month_names'. 
#                     That gets a dataframe with 'Date' column 
#                     And returns dataframe with 'Month_Name.' 
#                 """

user_content =  """
                    Return a python method called 'add_month_names'. 
                    That gets a pandas series containing some dates with string data type. 
                    And returns a pandas series containing the month name of each date, don't use 'strftime'.  
                """
code = generate_code_and_execute(user_content)

print(code)


import pandas as pd

def add_month_names(dates):
  return pd.to_datetime(dates).dt.month_name()



```python
import pandas as pd

def add_month_names(series):
  # Convert the series to datetime format
  dates = pd.to_datetime(series)

  # Extract the month name from the datetime series
  month_names = dates.dt.month_name()

  # Return the month names as a pandas series
  return month_names
```

In [9]:
data['Month_Name'] = add_month_names(data.Date)
data

Unnamed: 0,Date,Product_Category,Items_Sold,Month_Name
0,2017-01-01,Art & Crafts,81,January
1,2017-01-01,Electronics,204,January
2,2017-01-01,Games,549,January
3,2017-01-01,Sports & Outdoors,269,January
4,2017-01-01,Toys,382,January
...,...,...,...,...
3185,2018-09-30,Art & Crafts,897,September
3186,2018-09-30,Electronics,128,September
3187,2018-09-30,Games,321,September
3188,2018-09-30,Sports & Outdoors,616,September


In [10]:
new_code = update_code(code, " make the month name its short cut.")

print(new_code)

Successfully updated and executed code on attempt 1

import pandas as pd

def add_month_names(dates):
  return pd.to_datetime(dates).dt.strftime('%b')



```python
import pandas as pd

def add_month_names(series):
  # Convert the series to datetime format
  dates = pd.to_datetime(series)

  # Extract the month name from the datetime series
  month_names = dates.dt.month_name()

  # Convert month names to their short forms
  month_names = month_names.str.slice(0, 3)

  # Return the month names as a pandas series
  return month_names
```

In [11]:
data['Month_Name'] = add_month_names(data.Date)
data

Unnamed: 0,Date,Product_Category,Items_Sold,Month_Name
0,2017-01-01,Art & Crafts,81,Jan
1,2017-01-01,Electronics,204,Jan
2,2017-01-01,Games,549,Jan
3,2017-01-01,Sports & Outdoors,269,Jan
4,2017-01-01,Toys,382,Jan
...,...,...,...,...
3185,2018-09-30,Art & Crafts,897,Sep
3186,2018-09-30,Electronics,128,Sep
3187,2018-09-30,Games,321,Sep
3188,2018-09-30,Sports & Outdoors,616,Sep


In [25]:
# write your code here

user_content =  """
                    Return a Python method named 'subplots' that takes a dataframe and plots line chart subplots per 'Product_Category'. Each line chart displays the number of 'Items_Sold' per 'Date', put otwo line chart in each row. Utilize distinct colors for each product category and only display the first day of the month on the x-axis. use Plotly Express module for data visualization.
                """
code = generate_code_and_execute(user_content)

print(code)


import plotly.express as px
import pandas as pd

def subplots(df):
    fig = px.line(df, x="Date", y="Items_Sold", color="Product_Category",
                  facet_col="Product_Category", facet_col_wrap=2,
                  title="Items Sold by Product Category")
    fig.update_xaxes(tickformat="%d", tickvals=df["Date"][::30])
    fig.show()



```python
import plotly.express as px
import pandas as pd

def subplots(df):
    fig = px.line(df, x="Date", y="Items_Sold", color="Product_Category", facet_col="Product_Category", facet_col_wrap=2)
    fig.update_xaxes(tickformat="%d", tickmode="linear", tick0=0, dtick=30)
    fig.show()

```

In [26]:
subplots(data)

In [37]:
# Update the chart if needed using the update_code method

new_code = update_code(code, "increase the height of the chart by 1000px, add x and y labels to each subplot.")

print(new_code)


Successfully updated and executed code on attempt 1

import plotly.express as px
import pandas as pd

def subplots(df):
    fig = px.line(df, x="Date", y="Items_Sold", color="Product_Category",
                  facet_col="Product_Category", facet_col_wrap=2,
                  title="Items Sold by Product Category")
    fig.update_xaxes(tickformat="%d", tickvals=df["Date"][::30])
    fig.update_yaxes(title_text="Items Sold")
    fig.update_xaxes(title_text="Date")
    fig.update_layout(height=1000)
    fig.show()



In [38]:
subplots(data)