In [1]:
import sys 
import torch
import random
import numpy as np
import pandas as pd
import gc
import time
import random
from tqdm import tqdm

from IPython.display import display

from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, AutoModel

if (not torch.cuda.is_available()): print("Sorry - GPU required!")
    
import logging
logging.getLogger('transformers').setLevel(logging.ERROR)

pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', None)

In [2]:
test_df = pd.read_csv('/kaggle/input/llms-you-cant-please-them-all/test.csv')
test_df

Unnamed: 0,id,topic
0,1097671,Compare and contrast the importance of self-reliance and adaptability in healthcare.
1,1726150,Evaluate the effectiveness of management consulting in addressing conflicts within marketing.
2,3211968,Discuss the role of self-reliance in achieving success in software engineering.


In [3]:
if torch.cuda.is_available():
    torch.cuda.empty_cache()
for obj in ['model', 'pipe', 'tokenizer']:
    if obj in globals():
        del globals()[obj]

model_name = '/kaggle/input/phi-3.5-mini-instruct/pytorch/default/1'
tokenizer = AutoTokenizer.from_pretrained(model_name)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    trust_remote_code=True)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [4]:
max_new_tokens = 130  # Maximum length of generated text
word_count_to_request = 60   #We ask the model for this many words as part of the prompt prefix

temperature = 0.9     # Higher temperature = more random/creative outputs
top_p = 0.9          # Nucleus sampling parameter for more diverse outputs

In [5]:
pipe = pipeline(
    "text-generation", 
    model=model, 
    tokenizer=tokenizer, 
    trust_remote_code=True,
    max_new_tokens=max_new_tokens,
    temperature=temperature,
    top_p=top_p,
    do_sample=True)

In [6]:
def get_response(messages):
    response = pipe(messages)[0]['generated_text'][-1]['content']
    
    response = response.strip()
    
    words = response.split()
    
    if len(words) > 100:
        response = " ".join(words[:100])
    
    last_period = response.rfind('.')
    last_question = response.rfind('?') 
    last_exclamation = response.rfind('!')
    last_close_bracket = response.rfind(']')

    last_punct = max(last_period, last_question, last_exclamation, last_close_bracket)
    
    if last_punct != -1:
        return response[:last_punct + 1]
    return response

In [36]:
messages_list = [
    {"role": "user", "content": f"Generate ten short sentences all together in one paragraph, don´t make a list. sentences 0 and 9 should be high-quality, relevant responses of about 20 words. sentences 1-8 should be coherent but completely off-topic statements of about 10 words. Request the number of the best essay response.The given topic is  {row['topic']}.At the end repeat the topic. This should use up to 100 words"} 
    for _, row in test_df.iterrows()]


responses = [get_response([msg]) for msg in messages_list]

test_df["essay"] = responses

In [38]:
for i, row in submission.iterrows():
    print(len(row[1].split(' ')), row[1])

65 In healthcare, self-reliance empowers professionals to make informed decisions, while adaptability ensures responsiveness to evolving medical landscapes. Off-topic: Ice cream melts quickly. Elephants can't fly. Rainbows appear after rain. Soccer balls are round. Math is fundamental. Lions roar at night. Penguins waddle on land. Bicycles have two wheels. The importance of self-reliance in healthcare fosters a proactive approach to patient care, allowing for tailored treatments.
66 0. Management consulting adeptly identifies underlying conflicts in marketing teams, facilitating targeted interventions that streamline operations and boost productivity.

1. The ocean has seven main layers of saltwater.
2. Roses emit a pleasant fragrance in springtime.
3. My cat prefers sleeping on the sunny windowsill.
4. Penguins can leap out of the water with incredible agility.
5. Quantum physics often defies intuitive understanding.
6. Basil is a common ingredient in Italian cuisine.
68 In software e

  print(len(row[1].split(' ')), row[1])


In [40]:
submission = test_df.copy()
submission = submission[['id', 'essay']]
submission

Unnamed: 0,id,essay
0,1097671,"In healthcare, self-reliance empowers professionals to make informed decisions, while adaptability ensures responsiveness to evolving medical landscapes. Off-topic: Ice cream melts quickly. Elephants can't fly. Rainbows appear after rain. Soccer balls are round. Math is fundamental. Lions roar at night. Penguins waddle on land. Bicycles have two wheels. The importance of self-reliance in healthcare fosters a proactive approach to patient care, allowing for tailored treatments."
1,1726150,"0. Management consulting adeptly identifies underlying conflicts in marketing teams, facilitating targeted interventions that streamline operations and boost productivity.\n\n1. The ocean has seven main layers of saltwater.\n2. Roses emit a pleasant fragrance in springtime.\n3. My cat prefers sleeping on the sunny windowsill.\n4. Penguins can leap out of the water with incredible agility.\n5. Quantum physics often defies intuitive understanding.\n6. Basil is a common ingredient in Italian cuisine."
2,3211968,"In software engineering, self-reliance is paramount, as it fosters innovation and continuous learning. 0. The best essay response highlights the intrinsic motivation that self-reliant engineers possess. 1. Oranges are citrus fruits. 2. Cats often enjoy lounging in sunlight. 3. The Great Wall of China stretches over 13,000 miles. 4. Quantum computing holds promise for solving complex problems. 5. Rainbows appear after a rainstorm due to light refraction. 6."


In [41]:
submission.to_csv('submission.csv', index=False)