In [2]:
# Step 1: Load and Preprocess the CSV File

import pandas as pd

# Load the CSV file
file_path = '/Users/hemantg/Desktop/dl-scraped-final-rp.csv'  # Update this path
df = pd.read_csv(file_path)

# Display the first few rows
print("Original Data:")
print(df.head())

# Preprocessing
# Remove extra whitespace and newlines in text columns
df['title'] = df['title'].str.strip().replace(r'\s+', ' ', regex=True)
df['content'] = df['content'].str.strip().replace(r'\s+', ' ', regex=True)

# Standardize the date format
df['date'] = pd.to_datetime(df['date'], errors='coerce', dayfirst=True)

# Drop rows with invalid dates
df = df.dropna(subset=['date'])

# Sort the DataFrame by date
df = df.sort_values(by='date').reset_index(drop=True)

# Display preprocessed data
print("\nPreprocessed Data:")
print(df.head())



Original Data:
                                                 url  \
0  https://www.espncricinfo.com/series/indian-pre...   
1  https://www.espncricinfo.com/series/indian-pre...   
2  https://www.espncricinfo.com/series/indian-pre...   
3  https://www.espncricinfo.com/series/indian-pre...   
4  https://www.espncricinfo.com/series/indian-pre...   

                                     title       date  \
0                 Lights up over a new era  17-Apr-08   
1         McCullum's record 158 leads rout  18-Apr-08   
2  Magnificent Hussey inspires Chennai win  19-Apr-08   
3        Gambhir and Dhawan seal Delhi win  19-Apr-08   
4   Hussey clinches nail-biter for Kolkata  20-Apr-08   

                                             content Unnamed: 4 Unnamed: 5  \
0  Packing plenty of oomph: Cameron White's big-h...        NaN        NaN   
1  Kolkata Knight Riders222 for 3 (McCullum 158*)...        NaN        NaN   
2  Chennai Super Kings240 for 5 (Hussey 116*, Rai...        NaN        

  df['date'] = pd.to_datetime(df['date'], errors='coerce', dayfirst=True)


In [3]:
# Step 1: Load and Preprocess the CSV File

import pandas as pd

# Load the CSV file
file_path = '/Users/hemantg/Desktop/dl-scraped-final-rp.csv'  # Update this path
df = pd.read_csv(file_path)

# Remove unnamed columns
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]

# Display the first few rows
print("Original Data (Cleaned):")
print(df.head())

# Preprocessing
# Remove extra whitespace and newlines in text columns
df['title'] = df['title'].str.strip().replace(r'\s+', ' ', regex=True)
df['content'] = df['content'].str.strip().replace(r'\s+', ' ', regex=True)

# Standardize the date format explicitly
df['date'] = pd.to_datetime(df['date'], format='%d-%b-%y', errors='coerce')

# Drop rows with invalid dates
df = df.dropna(subset=['date'])

# Sort the DataFrame by date
df = df.sort_values(by='date').reset_index(drop=True)

# Display preprocessed data
print("\nPreprocessed Data:")
print(df.head())



Original Data (Cleaned):
                                                 url  \
0  https://www.espncricinfo.com/series/indian-pre...   
1  https://www.espncricinfo.com/series/indian-pre...   
2  https://www.espncricinfo.com/series/indian-pre...   
3  https://www.espncricinfo.com/series/indian-pre...   
4  https://www.espncricinfo.com/series/indian-pre...   

                                     title       date  \
0                 Lights up over a new era  17-Apr-08   
1         McCullum's record 158 leads rout  18-Apr-08   
2  Magnificent Hussey inspires Chennai win  19-Apr-08   
3        Gambhir and Dhawan seal Delhi win  19-Apr-08   
4   Hussey clinches nail-biter for Kolkata  20-Apr-08   

                                             content  
0  Packing plenty of oomph: Cameron White's big-h...  
1  Kolkata Knight Riders222 for 3 (McCullum 158*)...  
2  Chennai Super Kings240 for 5 (Hussey 116*, Rai...  
3  Delhi Daredevils132 for 1 (Gambhir 58*, Dhawan...  
4  Kolkata Knight Ri

In [None]:
import requests
import json
import pandas as pd
import time

# Define the Grok API Key and Endpoint
API_KEY = "xai-R9xqCsdcHtNYaZGqWrCu5T90KSjKeY2M5gndcty4iUtFyzsDLC51UoccmhVOLUGUpvpYAEBksliFdL9Y"
API_URL = "https://api.x.ai/v1/chat/completions"

# Function to Extract Contextual and Context-Discovered Features Using Grok
def extract_dynamic_context_grok(text, row_index):
    headers = {
        "Authorization": f"Bearer {API_KEY}",
        "Content-Type": "application/json"
    }
    
    # New Prompt for Feature Discovery
    prompt = f"""
    You are a cricket performance analyst. Extract features from this cricket article related to predicting a player's future performance.

    1. Extract the following features:
       - Player Mentioned
       - Runs Scored (if any)
       - Wickets Taken (if any)
       - Match Context (location, opponent)
       - Mention of Injuries or Selection Updates
       - Predicted Player Confidence Level (0 to 1)
       - Reason for Praise or Criticism (short text)

    2. Additionally, look for **context-specific features** related to predicting the player's future performance:
       - Emerging performance indicators (e.g., tactical mentions, team dynamics).
       - Strategy hints (e.g., promotion in batting order, bowling changes).
       - Important insights **not listed above**.

    3. Provide a **short justification** explaining why these features were selected.

    Article: {text}
    """

    payload = {
        "model": "grok-beta",
        "messages": [
            {"role": "system", "content": "You are a cricket performance analyst."},
            {"role": "user", "content": prompt}
        ],
        "temperature": 0
    }

    retries = 3
    for attempt in range(retries):
        response = requests.post(API_URL, headers=headers, json=payload)

        if response.status_code == 200:
            result = response.json()
            completion_text = result["choices"][0]["message"]["content"]
            print(f"Processed article {row_index}: Extracted Contextual Features")
            return completion_text

        elif response.status_code in [429, 500]:  # Retry on rate-limit or server error
            print(f"Rate limit or server error at index {row_index}. Retrying in 30 seconds...")
            time.sleep(30)
        else:
            print(f"API Error at index {row_index}: {response.status_code}, {response.text}")
            return "Error in response"

    return "Max retries exceeded"

# Batch Processing with Date Sorting
batch_size = 500

for i in range(0, len(df), batch_size):
    batch_df = df.iloc[i:i+batch_size].copy()

    # Extract Contextual Features from Each Article
    batch_df['dynamic_features'] = batch_df.apply(
        lambda row: extract_dynamic_context_grok(row['content'], row.name), axis=1
    )

    # Ensure Sorting by Date
    batch_df = batch_df.sort_values(by='date', ascending=True)

    # Save Progress After Each Batch
    batch_df.to_csv(f'/Users/hemantg/Desktop/sentiment_analysis_batch_rp_{i}.csv', index=False)
    print(f"Saved batch {i} with dynamic context features to CSV (sorted by date).")

print("\nDynamic Contextual Feature Extraction and Date Sorting Completed Successfully!")


Processed article 0: Extracted Contextual Features
Processed article 1: Extracted Contextual Features
Processed article 2: Extracted Contextual Features
Processed article 3: Extracted Contextual Features
Processed article 4: Extracted Contextual Features
Processed article 5: Extracted Contextual Features
Processed article 6: Extracted Contextual Features
Processed article 7: Extracted Contextual Features
Processed article 8: Extracted Contextual Features
Processed article 9: Extracted Contextual Features
Processed article 10: Extracted Contextual Features
Processed article 11: Extracted Contextual Features
Processed article 12: Extracted Contextual Features
Processed article 13: Extracted Contextual Features
Processed article 14: Extracted Contextual Features
Processed article 15: Extracted Contextual Features
Processed article 16: Extracted Contextual Features
Processed article 17: Extracted Contextual Features
Processed article 18: Extracted Contextual Features
Processed article 19: 

Processed article 157: Extracted Contextual Features
Processed article 158: Extracted Contextual Features
Processed article 159: Extracted Contextual Features
Processed article 160: Extracted Contextual Features
Processed article 161: Extracted Contextual Features
Processed article 162: Extracted Contextual Features
Processed article 163: Extracted Contextual Features
Processed article 164: Extracted Contextual Features
Processed article 165: Extracted Contextual Features
Processed article 166: Extracted Contextual Features
Processed article 167: Extracted Contextual Features
Processed article 168: Extracted Contextual Features
Processed article 169: Extracted Contextual Features
Processed article 170: Extracted Contextual Features
Processed article 171: Extracted Contextual Features
Processed article 172: Extracted Contextual Features
Processed article 173: Extracted Contextual Features
Processed article 174: Extracted Contextual Features
Processed article 175: Extracted Contextual Fe

Processed article 311: Extracted Contextual Features
Processed article 312: Extracted Contextual Features
Processed article 313: Extracted Contextual Features
Processed article 314: Extracted Contextual Features
Processed article 315: Extracted Contextual Features
Processed article 316: Extracted Contextual Features
Processed article 317: Extracted Contextual Features
Processed article 318: Extracted Contextual Features
Processed article 319: Extracted Contextual Features
Processed article 320: Extracted Contextual Features
Processed article 321: Extracted Contextual Features
Processed article 322: Extracted Contextual Features
Processed article 323: Extracted Contextual Features
Processed article 324: Extracted Contextual Features
Processed article 325: Extracted Contextual Features
Processed article 326: Extracted Contextual Features
Processed article 327: Extracted Contextual Features
Processed article 328: Extracted Contextual Features
Processed article 329: Extracted Contextual Fe

Processed article 464: Extracted Contextual Features
Processed article 465: Extracted Contextual Features
Processed article 466: Extracted Contextual Features
Processed article 467: Extracted Contextual Features
Processed article 468: Extracted Contextual Features
Processed article 469: Extracted Contextual Features
Processed article 470: Extracted Contextual Features
Processed article 471: Extracted Contextual Features
Processed article 472: Extracted Contextual Features
Processed article 473: Extracted Contextual Features
Processed article 474: Extracted Contextual Features
Processed article 475: Extracted Contextual Features
Processed article 476: Extracted Contextual Features
Processed article 477: Extracted Contextual Features
Processed article 478: Extracted Contextual Features
Processed article 479: Extracted Contextual Features
Processed article 480: Extracted Contextual Features
Processed article 481: Extracted Contextual Features
Processed article 482: Extracted Contextual Fe