In [1]:
import requests
import PyPDF2
from io import BytesIO
import pandas as pd
import os
from dotenv import load_dotenv
from pathlib import Path
import json
import numpy as np
import matplotlib.pyplot as plt
import requests
import httpx

In [2]:
dotenv_path = "../.env"
dotenv_path = Path("../.env")
load_dotenv(dotenv_path=dotenv_path)

gemini_api_key = os.getenv("GEMINI_API_KEY4")
deepseek_api_key = os.getenv("DEEPSEEK_API_KEY")

In [3]:
from google import genai
from google.genai import types

client = genai.Client(api_key=gemini_api_key)

In [130]:
from openai import OpenAI
client = OpenAI(api_key=deepseek_api_key, base_url="https://api.deepseek.com")

In [4]:
Statementdirectory = "../Statements/Cleaned/"
Promptdirectory = "./Prompts/"

## Load Data

In [5]:
CF4_DF = pd.read_csv(Statementdirectory+"CodeforcesStatement4.csv")
CF3_DF = pd.read_csv(Statementdirectory+"CodeforcesStatement3.csv")
CF2_DF = pd.read_csv(Statementdirectory+"CodeforcesStatement2.csv")
CF1_DF = pd.read_csv(Statementdirectory+"CodeforcesStatement1.csv")

In [6]:
Combined_CF = pd.concat([CF2_DF,CF3_DF,CF4_DF])

In [9]:
Combined_CF =  Combined_CF.sample(frac=1).reset_index(drop=True)

In [10]:
Combined_CF

Unnamed: 0,problem_id,website,problem_statement
0,1321F,https://codeforces.com,"In this problem, we will deal with binary stri..."
1,559A,https://codeforces.com,Gerald got a very curious hexagon for his birt...
2,364C,https://codeforces.com,We'll call a set of positive integers a beauti...
3,266B,https://codeforces.com,"During the break the schoolchildren, boys and ..."
4,398B,https://codeforces.com,User ainta decided to paint a wall. The wall c...
...,...,...,...
18964,620B,https://codeforces.com,B. Grandfather Dovlet’s calculator\nOnce Max f...
18965,1077A,https://codeforces.com,A. Frog Jumping\nA frog is currently at the po...
18966,1031E,https://codeforces.com,You are given an array $$$a$$$ of length $$$n$...
18967,1446D2,https://codeforces.com,This is the hard version of the problem. The d...


## Load Prompt

In [11]:
with open(Promptdirectory+'PromptB0.txt','r') as cp:
  promptB0 = cp.read()

with open(Promptdirectory+'PromptB1.txt','r') as cp:
  promptB1 = cp.read()  
  
with open(Promptdirectory+'PromptV1.txt','r') as cp:
  promptV1 = cp.read()
  
with open(Promptdirectory+'PromptB2.txt','r') as cp:
  promptB2 = cp.read()
  
with open(Promptdirectory+'PromptV1.5.txt','r') as cp:
  promptV1_5 = cp.read()
  
with open(Promptdirectory+'PromptV2.txt','r') as cp:
  promptV2 = cp.read()
  
with open(Promptdirectory+'PromptV3.txt','r') as cp:
  promptV3 = cp.read()
  
with open(Promptdirectory+'PromptV4.txt','r') as cp:
  promptV4 = cp.read()
  

In [12]:
promptV1_5

"You are a competitive programming expert. Your task is to answer competitive programming questions using the provided C++ code template. Ensure that you handle all edge cases diligently and do not add any comments in the code. Use small variable names throughout your solution. Follow the structure and conventions established in the template given, ensuring readability and clarity while adhering strictly to the format.\n\nGiven the question, provide a C++ solution for the problem described below, ensuring that you think thoroughly before rushing. Test your solution with all provided example test cases and edge cases, including those that may cause runtime or memory limit errors. Be wary of hidden test cases that might not follow the obvious patterns \nAnalyze the problem statement, identify key elements, and derive a solution step by step\n\n### Steps:\n1. **Read the problem statement carefully** to understand what is being asked.\n2. **Identify key constraints** and edge cases that ma

## Functions

In [12]:
SolutionDF = pd.DataFrame(columns=['writer', 'problem_id', 'submission_id', 'website', 'source'])

In [None]:
def pdfURL_to_text(url):
    res = requests.get(url)
    with open("./Temps/temp.pdf", "wb") as f:
        f.write(res.content)
    pdf_reader = PyPDF2.PdfReader(BytesIO(res.content))
    text = ""
    for page in pdf_reader.pages:
        text += page.extract_text()
    print(text)
    return text
    

In [13]:
def SolutionInsert(source,website,problem_id):
    global SolutionDF
    row = {
        'writer':'AI',
        'problem_id':problem_id,
        'submission_id':np.nan,
        'website':website,
        'source':source
    }
    print(row)
    SolutionDF = pd.concat([SolutionDF,pd.DataFrame([row])],ignore_index=True)
    return row

In [14]:
import re
pattern = re.compile(r'```cpp\s*(.*?)\s*```', re.DOTALL)
match = pattern.search(r'```cpp adsfasdf ```')

# if match:
#     print("Matched:", match.group(1))  # Output: adsfasdf
# else:
#     print("No match found.")

In [132]:
# Change Config Here

# Default
# TEMPERATURE=1,
# TOP_P=0.95,

# Pxsit's Suggest
Temperature=float(0.8),
Top_P=float(0.85),

def DeepseekGenerateSol(source_text,prompt_text=promptV1,model="deepseek-chat"):
    try:
        response = client.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": prompt_text},
            {"role": "user", "content": source_text},
        ],
        temperature=Temperature[0],
        top_p=Top_P[0],
        stream=False
        ) 
        try :
            answer = pattern.search(response.choices[0].message.content).group(1)
        except Exception as e:
            return None, e
    except Exception as e:
        return None, e
    return answer, response

In [18]:
Temperature=float(0.8),
Top_P=float(0.85),

def GeminiGenerateSol(source_text,prompt_text=promptV1,model="gemini-2.0-flash"):
    try:
        response = client.models.generate_content(
        model=model,
        contents=[source_text,prompt_text],
        config= types.GenerateContentConfig(
            temperature=Temperature[0],
            top_p=Top_P[0],
            response_mime_type="text/plain",
        )
        
        )
        try :
            answer = pattern.search(response.text).group(1)
        except Exception as e:
            return None, e
    except Exception as e:
        return None, e
    return answer, response

## Gemini

In [19]:
# Generate Solution from Programming.in.th
SolutionDF = pd.DataFrame(columns=['writer', 'problem_id', 'submission_id', 'website', 'source'])
from IPython.display import clear_output
import time
SAVE_PATH = './Generated_Solution/Pxsit_Suggested/Gemini_Codeforces_solV4-1.csv' #Change save PATH here
start_idx = 0
end_idx = min(1400,len(Combined_CF))

for index, row in Combined_CF[:end_idx].iterrows():
    if index < start_idx:
        continue
    error_cnt = 0
    while True:
        if error_cnt >= 2:
            ans = None
            break
        ans, res = GeminiGenerateSol(row['problem_statement'],promptV4) # Change Prompt Here
        if ans is not None:
            break
        else:
            print(res)
        if not ans == None:  
                break
        else:  
            error_message = str(res)
            if ('The document has no pages.' in error_message):
                print(f"Statement : {row['PdfURL']} not found")
                break
            print(f"ERROR : {error_message}")
            print(f"Failed at index {index}, retrying in 30 seconds...")
            SolutionDF.to_csv(SAVE_PATH,index=False)
            time.sleep(10)
            error_cnt+=1
    if ans == None:
        continue
    clear_output()
    print(f"{index} / {end_idx} ")
    print("\n", ans)
    SolutionInsert(ans, 'https://codeforces.com', row['problem_id'])
    if index%20 == 0:
        SolutionDF.to_csv(SAVE_PATH,index=False)
SolutionDF.to_csv(SAVE_PATH,index=False)

1304 / 1400 

 #include <bits/stdc++.h>
using namespace std;

typedef long long ll;

int main() {
    int n;
    cin >> n;

    vector<int> a(n);
    for (int i = 0; i < n; ++i) {
        cin >> a[i];
    }

    int m;
    cin >> m;

    vector<int> b(m);
    for (int i = 0; i < m; ++i) {
        cin >> b[i];
    }

    ll vasya_comps = 0;
    ll petya_comps = 0;

    for (int i = 0; i < m; ++i) {
        int target = b[i];

        // Vasya's approach
        int vasya_count = 0;
        for (int j = 0; j < n; ++j) {
            vasya_count++;
            if (a[j] == target) {
                break;
            }
        }
        vasya_comps += vasya_count;

        // Petya's approach
        int petya_count = 0;
        for (int k = n - 1; k >= 0; --k) {
            petya_count++;
            if (a[k] == target) {
                break;
            }
        }
        petya_comps += petya_count;
    }

    cout << vasya_comps << " " << petya_comps << '\n';

    return 0;
}
{'writer

## Deepseek-Chat

In [133]:
# Generate Solution from Programming.in.th
SolutionDF = pd.DataFrame(columns=['writer', 'problem_id', 'submission_id', 'website', 'source'])
from IPython.display import clear_output
import time
SAVE_PATH = './Generated_Solution/Pxsit_Suggested/DeepSeek-Chat_solB0-1.csv' #Change save PATH here
start_idx = 4500
end_idx = 5500

for index, row in Combined_CF[:end_idx].iterrows():
    if index < start_idx:
        continue
    error_cnt = 0
    while True:
        if error_cnt >= 2:
            ans = None
            break
        ans, res = DeepseekGenerateSol(row['problem_statement'],promptB0) # Change Prompt Here
        if ans is not None:
            break
        else:
            print(res)
        if not ans == None:  
                break
        else:  
            error_message = str(res)
            if ('The document has no pages.' in error_message):
                print(f"Statement : {row['PdfURL']} not found")
                break
            print(f"ERROR : {error_message}")
            print(f"Failed at index {index}, retrying in 30 seconds...")
            SolutionDF.to_csv(SAVE_PATH,index=False)
            time.sleep(10)
            error_cnt+=1
    if ans == None:
        continue
    clear_output()
    print(f"{index} / {end_idx} ")
    print("\n", ans)
    SolutionInsert(ans, 'https://codeforces.com', row['problem_id'])
    if index%20 == 0:
        SolutionDF.to_csv(SAVE_PATH,index=False)
SolutionDF.to_csv(SAVE_PATH,index=False)

4601 / 5500 

 #include <iostream>
#include <string>
#include <map>

using namespace std;

int main() {
    int n;
    string s;
    cin >> n >> s;
    
    map<string, int> freq;
    
    for (int i = 0; i < n - 1; ++i) {
        string twoGram = s.substr(i, 2);
        freq[twoGram]++;
    }
    
    string maxTwoGram;
    int maxCount = 0;
    
    for (auto& pair : freq) {
        if (pair.second > maxCount) {
            maxCount = pair.second;
            maxTwoGram = pair.first;
        }
    }
    
    cout << maxTwoGram << endl;
    
    return 0;
}
{'writer': 'AI', 'problem_id': '977B', 'submission_id': nan, 'website': 'https://codeforces.com', 'source': '#include <iostream>\n#include <string>\n#include <map>\n\nusing namespace std;\n\nint main() {\n    int n;\n    string s;\n    cin >> n >> s;\n    \n    map<string, int> freq;\n    \n    for (int i = 0; i < n - 1; ++i) {\n        string twoGram = s.substr(i, 2);\n        freq[twoGram]++;\n    }\n    \n    string maxTwoGram;\

KeyboardInterrupt: 