In [None]:
# https://medium.com/@abonia/running-ollama-in-google-colab-free-tier-545609258453
!pip install colab-xterm
%load_ext colabxterm

Collecting colab-xterm
  Downloading colab_xterm-0.2.0-py3-none-any.whl.metadata (1.2 kB)
Downloading colab_xterm-0.2.0-py3-none-any.whl (115 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/115.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.6/115.6 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: colab-xterm
Successfully installed colab-xterm-0.2.0


In [None]:
%xterm

# copie e cole os comandos abaixo
# curl https://ollama.ai/install.sh | sh
# ./ollama serve &
#
# ./ollama pull gemma3

In [None]:
!ollama serve &

/bin/bash: line 1: ollama: command not found


In [None]:
!ollama pull gemma3

Error: could not connect to ollama app, is it running?


In [None]:
!ollama list

Error: could not connect to ollama app, is it running?


In [None]:
import pandas as pd

# Login using e.g. `huggingface-cli login` to access this dataset
df = pd.read_csv("hf://datasets/giseldo/deep-se/deep-se.csv")

df.to_csv("deep-se.csv", index=False)

In [None]:
!pip install ollama

Collecting ollama
  Downloading ollama-0.4.8-py3-none-any.whl.metadata (4.7 kB)
Downloading ollama-0.4.8-py3-none-any.whl (13 kB)
Installing collected packages: ollama
Successfully installed ollama-0.4.8


In [None]:
import ollama
import pandas as pd
from typing import Dict, List
from uuid import uuid4

class StoryPointsEstimationModel:
    def __init__(self, model_name: str = "gemma3"):
        """Inicializa o modelo com o nome do modelo Ollama."""
        self.model_name = model_name

    def preprocess_data(self, user_story_data: Dict) -> str:
        """Converte os dados da user story em um prompt estruturado para o LLM."""
        prompt = (
            "Você é um especialista em estimativa de esforço para projetos ágeis. "
            "Com base no texto da user story e sua descrição, estime os story points necessários para completar a tarefa. "
            "Considere fatores como complexidade, volume de trabalho e riscos implícitos. "
            "Retorne apenas um número inteiro representando os story points (ex.: 1, 2, 3, 5, 8, 13, etc.).\n\n"
            f"Texto da user story: {user_story_data['user_story_text']}\n"
            f"Descrição da user story: {user_story_data['description']}\n"
            f"Retorne sempre apenas um número inteiro representando os story points (ex.: 1, 2, 3, 5, 8, 13, etc.).\n\n"
            f"Não retorne nenhum texto além do número inteiro.\n\n"
            f"Não retorne nenhum texto além do número inteiro.\n\n"
        )
        return prompt

    def call_llm(self, prompt: str) -> int:
        """Faz a chamada ao modelo Ollama e retorna a estimativa de story points."""
        try:
            response = ollama.generate(
                model=self.model_name,
                prompt=prompt,
                options={
                    "temperature": 0.3,
                    "num_predict": 10
                }
            )
            return int(response.get("response", "0").strip())
        except ValueError as e:
            print(f"Erro ao converter a resposta do Ollama para número inteiro: {str(e)}")
            # raise Exception(f"Erro ao converter a resposta do Ollama para número inteiro: {str(e)}")
            return int(-1)
        except Exception as e:
            raise Exception(f"Erro ao chamar o modelo Ollama: {str(e)}")

    def estimate_story_points(self, user_story_data: Dict) -> int:
        """Estima os story points para uma user story usando o LLM."""
        prompt = self.preprocess_data(user_story_data)
        story_points = self.call_llm(prompt)
        return story_points

    def batch_estimate(self, user_stories: List[Dict]) -> pd.DataFrame:
        """Estima os story points para várias user stories e retorna um DataFrame com os resultados."""
        results = []
        for story in user_stories:
            story_points = self.estimate_story_points(story)
            results.append({
                "story_id": story.get("story_id", str(uuid4())),
                "estimated_story_points": story_points,
                "real_story_points": story.get("real_story_points", None)
            })
        return pd.DataFrame(results)

def generate_llm_zero_shot_model(project_name):
    model = StoryPointsEstimationModel()

    df = pd.read_csv("deep-se.csv")
    df_jirasoftware = df[df["project"] == project_name]

    df_jirasoftware = df_jirasoftware[df_jirasoftware['storypoint'] != 0]
    df_jirasoftware = df_jirasoftware.dropna(subset=['storypoint', 'title', 'description'])

    # Calculate the split index for 70-30 split
    split_idx = int(len(df_jirasoftware) * 0.7)

    # Split the data into train and test sets
    df_train = df_jirasoftware.iloc[:split_idx]
    df_test = df_jirasoftware.iloc[split_idx:]

    print(f"Training set size: {len(df_train)}")
    print(f"Test set size: {len(df_test)}")

    user_stories = df_test.apply(lambda row: {
        "story_id": str(row.get("issuekey", uuid4())),
        "user_story_text": row.get("title", ""),
        "description": row.get("description", ""),
        "real_story_points": row.get("storypoint", None)
    }, axis=1).tolist()

    result_df = model.batch_estimate(user_stories)

    result_df.to_csv("result_llm_zero_shot_{}.csv".format(project_name), index=False)

if __name__ == "__main__":

    project_name = "jirasoftware"  # Default project name
    generate_llm_zero_shot_model(project_name)

    project_name = "clover"
    generate_llm_zero_shot_model(project_name)

    project_name = "usergrid"  # Default project name
    generate_llm_zero_shot_model(project_name)

    project_name = "bamboo"  # Default project name
    generate_llm_zero_shot_model(project_name)

    project_name = "duracloud"  # Default project name
    generate_llm_zero_shot_model(project_name)

    project_name = "mulestudio"  # Default project name
    generate_llm_zero_shot_model(project_name)

    project_name = "aptanastudio"  # Default project name
    generate_llm_zero_shot_model(project_name)

    project_name = "talendesb"  # Default project name
    generate_llm_zero_shot_model(project_name)

    project_name = "mule"  # Default project name
    generate_llm_zero_shot_model(project_name)

    project_name = "moodle"  # Default project name
    generate_llm_zero_shot_model(project_name)

    project_name = "talenddataquality"  # Default project name
    generate_llm_zero_shot_model(project_name)

    project_name = "mesos"  # Default project name
    generate_llm_zero_shot_model(project_name)

    project_name = "titanium"  # Default project name
    generate_llm_zero_shot_model(project_name)

    project_name = "appceleratorstudio"  # Default project name
    generate_llm_zero_shot_model(project_name)

    project_name = "springxd"  # Default project name
    generate_llm_zero_shot_model(project_name)

    project_name = "datamanagement"  # Default project name
    generate_llm_zero_shot_model(project_name)

    project_name = "springxd"  # Default project name
    generate_llm_zero_shot_model(project_name)


Training set size: 200
Test set size: 86
Training set size: 252
Test set size: 109


KeyboardInterrupt: 