In [None]:
import google.generativeai as genai
import os
import pandas as pd 
from tenacity import retry, stop_after_attempt, wait_exponential


In [None]:
# Configure the Gemini model API key
genai.configure(api_key="API_KEY")
model = genai.GenerativeModel('gemini-1.5-flash')

def assess_blog(blog_text):
    """
    Function to assess a blog using the Gemini 1.5 Flash model with strict evaluation criteria.
    """
    prompt = f"""
    Conduct a rigorous evaluation of the following blog text based on the specified parameters. Apply strict marking criteria and penalize heavily for any shortcomings:

    Blog Text:
    "{blog_text}"

    Evaluation Parameters:
    1. Clarity (1-10): Assess how easily understandable the content is. Penalize for any ambiguity or confusion.
    2. Grammar and Syntax (1-10): Scrutinize for any grammatical errors or awkward phrasing. Even minor mistakes should result in point deductions.
    3. Tone Appropriateness (1-10): Evaluate if the tone consistently matches a general audience. Any inappropriate shifts in tone should be penalized.
    4. Sentence Structure and Flow (1-10): Analyze the smoothness and logical progression of ideas. Penalize for any choppiness or lack of coherence.
    5. Engagement (1-10): Determine how well the blog captures and maintains reader interest. Lack of engaging elements should result in lower scores.
    6. Conciseness (1-10): Assess for unnecessary verbosity or repetition. Penalize for any superfluous content.

    Scoring Guidelines:
    - Use the full range of scores (1-10) effectively.
    - Score 9-10: Exceptional, near-perfect performance
    - Score 7-8: Good performance with minor issues
    - Score 5-6: Average performance with noticeable flaws
    - Score 3-4: Poor performance with significant issues
    - Score 1-2: Severely lacking in this aspect

    Provide your assessment in the following strict format:
    [Clarity score],[Grammar and Syntax score],[Tone Appropriateness score],[Sentence Structure and Flow score],[Engagement score],[Conciseness score]
    Suggestions: [Detailed suggestions for improvement, highlighting specific areas that led to score deductions]

    Ensure all scores are integers. Separate scores with commas only, no spaces. Provide specific, actionable suggestions for improvement.
    """

    response = model.generate_content(prompt)
    return response.text


Blog Assessment:


In [None]:
df = pd.read_csv('processed.csv')

In [None]:
from tqdm import tqdm
import csv
if 'assessment_70b' not in df.columns: # change for every file 
    df['assessment_70b'] = None
# Function to assess and save blog dynamically
def assess_blog_save(text, id, column_name, output_file):
    """Assess the blog text and save the results dynamically to a CSV file."""
    # Assess the blog text
    assessment = assess_blog(text)
    
    # Append result to the CSV file
    with open(output_file, 'a', newline='') as f:
        writer = csv.writer(f)
        writer.writerow([id, assessment])
    
    return assessment

# Initialize output files with headers
with open('assessment_70b.csv', 'a', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(['id', 'assessment_70b'])
    time.sleep(5)

In [None]:

@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
def process_item_with_retry(item):
    batch_size = 10  
    for start in range(0, len(df), batch_size):
        end = start + batch_size
        batch = df.iloc[start:end]
        try:
            for idx, row in tqdm(batch.iterrows(), total=len(batch), desc=f"Batch {start//batch_size + 1}"):
                try:
                    if pd.notna(row['assessment_8b']):
                        df.loc[idx, 'assessment_8b'] = row['assessment_8b']
                    else:
                        df.loc[idx, 'assessment_8b'] = assess_blog_save(
                            row['meta_llama_3.1-8b-instruct'], 
                            row['id'], 
                            'assessment_8b', 
                            'assessment_8b.csv'
                        )
                except Exception as e:
                    logging.error(f"Error processing row {idx}: {str(e)}")
                    time.sleep(5)  
        except Exception as e:
            logging.error(f"Major error occurred: {str(e)}")
        except Exception as e:
            logging.error(f"Error in batch {start//batch_size + 1}: {str(e)}")
            time.sleep(5)

In [None]:
from tenacity import retry, stop_after_attempt, wait_exponential

@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
def process_item_with_retry(item):
    batch_size = 10  
    for start in range(0, len(df), batch_size):
        end = start + batch_size
        batch = df.iloc[start:end]
        try:
            for idx, row in tqdm(batch.iterrows(), total=len(batch), desc=f"Batch {start//batch_size + 1}"):
                try:
                    if pd.notna(row['assessment_405b']):
                        df.loc[idx, 'assessment_405b'] = row['assessment_405b']
                    else:
                        df.loc[idx, 'assessment_405b'] = assess_blog_save(
                            row['meta_llama_3.1-405b-instruct'], 
                            row['id'], 
                            'assessment_405b', 
                            'assessment_405b.csv'
                        )
                except Exception as e:
                    logging.error(f"Error processing row {idx}: {str(e)}")
                    time.sleep(5)  
        except Exception as e:
            logging.error(f"Major error occurred: {str(e)}")
        except Exception as e:
            logging.error(f"Error in batch {start//batch_size + 1}: {str(e)}")
            time.sleep(5)

In [None]:
from tenacity import retry, stop_after_attempt, wait_exponential

@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
def process_item_with_retry(item):
    batch_size = 10 
    for start in range(0, len(df), batch_size):
        end = start + batch_size
        batch = df.iloc[start:end]
        try:
            for idx, row in tqdm(batch.iterrows(), total=len(batch), desc=f"Batch {start//batch_size + 1}"):
                try:
                    if pd.notna(row['assessment_70b']):
                        df.loc[idx, 'assessment_70b'] = row['assessment_70b']
                    else:
                        df.loc[idx, 'assessment_70b'] = assess_blog_save(
                            row['meta_llama_3.1-70b-instruct'], 
                            row['id'], 
                            'assessment_70b', 
                            'assessment_70b.csv'
                        )
                except Exception as e:
                    logging.error(f"Error processing row {idx}: {str(e)}")
                    time.sleep(5) 
        except Exception as e:
            logging.error(f"Major error occurred: {str(e)}")
        except Exception as e:
            logging.error(f"Error in batch {start//batch_size + 1}: {str(e)}")
            time.sleep(5)

In [26]:
process_item_with_retry(df)

Batch 1: 100%|██████████| 10/10 [00:00<00:00, 4427.17it/s]
Batch 2: 100%|██████████| 10/10 [00:00<00:00, 6588.60it/s]
Batch 3:  70%|███████   | 7/10 [00:05<00:02,  1.30it/s]ERROR:root:Error processing row 27: 504 Deadline Exceeded
Batch 3:  80%|████████  | 8/10 [10:10<03:26, 103.19s/it]ERROR:root:Error processing row 28: 504 Deadline Exceeded
Batch 3: 100%|██████████| 10/10 [20:19<00:00, 121.93s/it]
Batch 4:  50%|█████     | 5/10 [00:24<00:24,  4.92s/it]ERROR:root:Error processing row 35: 504 Deadline Exceeded
Batch 4:  90%|█████████ | 9/10 [10:43<01:09, 69.17s/it] ERROR:root:Error processing row 39: 504 Deadline Exceeded
Batch 4: 100%|██████████| 10/10 [20:49<00:00, 124.90s/it]
Batch 5:  10%|█         | 1/10 [00:05<00:46,  5.16s/it]ERROR:root:Error processing row 41: 504 Deadline Exceeded
Batch 5:  60%|██████    | 6/10 [10:28<03:34, 53.65s/it] ERROR:root:Error processing row 46: 504 Deadline Exceeded
Batch 5: 100%|██████████| 10/10 [20:47<00:00, 124.78s/it]
Batch 6:  60%|██████    | 6

In [34]:
process_item_with_retry(df)

Batch 1: 100%|██████████| 10/10 [00:00<00:00, 4673.32it/s]
Batch 2: 100%|██████████| 10/10 [00:00<00:00, 3930.56it/s]
Batch 3: 100%|██████████| 10/10 [00:08<00:00,  1.16it/s]
Batch 4: 100%|██████████| 10/10 [00:09<00:00,  1.07it/s]
Batch 5: 100%|██████████| 10/10 [00:08<00:00,  1.13it/s]
Batch 6: 100%|██████████| 10/10 [00:05<00:00,  1.91it/s]
Batch 7:   0%|          | 0/10 [00:00<?, ?it/s]ERROR:root:Error processing row 64: 504 Deadline Exceeded
Batch 7: 100%|██████████| 10/10 [10:10<00:00, 61.02s/it]
Batch 8: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]
Batch 9:  40%|████      | 4/10 [00:04<00:06,  1.15s/it]ERROR:root:Error processing row 87: 504 Deadline Exceeded
Batch 9: 100%|██████████| 10/10 [10:14<00:00, 61.41s/it]
Batch 10:  30%|███       | 3/10 [00:10<00:25,  3.70s/it]ERROR:root:Error processing row 94: 504 Deadline Exceeded
Batch 10: 100%|██████████| 10/10 [10:20<00:00, 62.05s/it]


In [35]:
process_item_with_retry(df)

Batch 1: 100%|██████████| 10/10 [00:00<00:00, 1725.34it/s]
Batch 2: 100%|██████████| 10/10 [00:00<00:00, 6132.02it/s]
Batch 3: 100%|██████████| 10/10 [00:00<00:00, 3201.03it/s]
Batch 4: 100%|██████████| 10/10 [00:00<00:00, 6648.13it/s]
Batch 5: 100%|██████████| 10/10 [00:00<00:00, 5321.37it/s]
Batch 6: 100%|██████████| 10/10 [00:00<00:00, 2558.13it/s]
Batch 7: 100%|██████████| 10/10 [00:06<00:00,  1.54it/s]
Batch 8: 100%|██████████| 10/10 [00:00<00:00, 4144.16it/s]
Batch 9: 100%|██████████| 10/10 [00:05<00:00,  1.75it/s]
Batch 10: 100%|██████████| 10/10 [00:04<00:00,  2.18it/s]
