In [None]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModelForCausalLM
from transformers import AdamW, get_linear_schedule_with_warmup
from tqdm import tqdm
import numpy as np
import json
import logging
import argparse

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class BERTGrokHybridModel(nn.Module):
    """
    Hybrid model that combines BERT's understanding with Grok's generative capabilities.
    
    Architecture:
    1. BERT encoder for contextual understanding
    2. Connection layer to bridge BERT to Grok
    3. Grok model for generation with BERT-informed context
    """
    def __init__(self, bert_model_name="bert-base-uncased", 
                 grok_api_key=None, 
                 device="cuda"):
        super(BERTGrokHybridModel, self).__init__()
        self.device = device
        
        # Load BERT for feature extraction
        logger.info(f"Loading BERT model: {bert_model_name}")
        self.bert_model = AutoModelForSequenceClassification.from_pretrained(bert_model_name)
        self.bert_tokenizer = AutoTokenizer.from_pretrained(bert_model_name)
        
        # Freeze BERT parameters (optional, can be fine-tuned if needed)
        for param in self.bert_model.parameters():
            param.requires_grad = False
            
        # Hidden dimension sizes
        self.bert_hidden_size = self.bert_model.config.hidden_size
        self.grok_hidden_size = 4096  # Grok model hidden size
        
        # Create connection layer between BERT and Grok
        self.connection_layer = nn.Sequential(
            nn.Linear(self.bert_hidden_size, 2048),
            nn.LayerNorm(2048),
            nn.GELU(),
            nn.Linear(2048, self.grok_hidden_size),
            nn.LayerNorm(self.grok_hidden_size)
        )
        
        # Grok integration via API
        self.grok_api_key = "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"
        self.grok_client = None
        if grok_api_key:
            # Initialize Grok API client
            self._initialize_grok_client()
        
    def _initialize_grok_client(self):
        """Initialize connection to Grok API"""
        try:
            import requests
            self.grok_client = True
            logger.info("Grok API client initialized successfully")
        except Exception as e:
            logger.error(f"Failed to initialize Grok API client: {e}")
            self.grok_client = None

    def forward(self, input_ids, attention_mask=None, labels=None):
        """
        Forward pass through the hybrid model
        
        Args:
            input_ids: Tensor of token ids
            attention_mask: Attention mask for padding
            labels: Optional labels for generation targets
            
        Returns:
            dict with loss and logits
        """
        # Get BERT embeddings
        with torch.no_grad() if not self.bert_model.training else torch.enable_grad():
            bert_outputs = self.bert_model(
                input_ids=input_ids,
                attention_mask=attention_mask,
                output_hidden_states=True
            )
            
        # Take the hidden states from the last layer of BERT
        bert_embeddings = bert_outputs.hidden_states[-1][:, 0, :]  # [CLS] token embedding
        
        # Transform BERT embeddings to be compatible with Grok
        grok_compatible_features = self.connection_layer(bert_embeddings)
        
        # If we're in training mode with labels, calculate loss
        if self.training and labels is not None:
            # Format the data for Grok API
            batch_size = input_ids.shape[0]
            results = {
                'loss': torch.zeros(1, requires_grad=True).to(self.device),
                'logits': torch.zeros((batch_size, self.grok_hidden_size)).to(self.device)
            }
            
            # If we have the Grok API client initialized
            if self.grok_client:
                # Here we would send data to Grok API for further processing
                # This is a placeholder for the actual API call
                # In a real implementation, you would make API calls to Grok
                # and process the results
                
                # For now, we'll simulate with a dummy loss calculation
                dummy_target = torch.ones_like(grok_compatible_features)
                results['loss'] = F.mse_loss(grok_compatible_features, dummy_target)
                results['logits'] = grok_compatible_features
            
            return results
        
        # If just doing inference, return the features
        return {'logits': grok_compatible_features}

class CustomDataset(Dataset):
    """Dataset for training the hybrid model"""
    def __init__(self, data_path, tokenizer, max_length=512):
        self.tokenizer = tokenizer
        self.max_length = max_length
        self.data = self._load_data(data_path)
        
    def _load_data(self, data_path):
        """Load the data from file"""
        if not os.path.exists(data_path):
            raise FileNotFoundError(f"Data file not found: {data_path}")
            
        with open(data_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
        
        logger.info(f"Loaded {len(data)} examples from {data_path}")
        return data
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        item = self.data[idx]
        
        # Tokenize inputs
        inputs = self.tokenizer(
            item['input_text'],
            padding='max_length',
            truncation=True,
            max_length=self.max_length,
            return_tensors='pt'
        )
        
        # Prepare output labels if available
        if 'output_text' in item:
            labels = self.tokenizer(
                item['output_text'],
                padding='max_length',
                truncation=True,
                max_length=self.max_length,
                return_tensors='pt'
            )
            
            return {
                'input_ids': inputs['input_ids'].squeeze(),
                'attention_mask': inputs['attention_mask'].squeeze(),
                'labels': labels['input_ids'].squeeze()
            }
        
        return {
            'input_ids': inputs['input_ids'].squeeze(),
            'attention_mask': inputs['attention_mask'].squeeze()
        }

class GrokAPIHandler:
    """
    Handler for Grok API interactions
    """
    def __init__(self, api_key):
        self.api_key = api_key
        self.base_url = "https://api.grok.ai/v1"
        self.headers = {
            "Authorization": f"Bearer {api_key}",
            "Content-Type": "application/json"
        }
    
    def fine_tune_model(self, training_data, validation_data=None, model_name="grok-1", 
                        epochs=3, batch_size=16, learning_rate=3e-5):
        """
        Send a fine-tuning request to the Grok API
        
        Args:
            training_data: Path to training data file
            validation_data: Optional path to validation data
            model_name: Base model to fine-tune
            epochs: Number of training epochs
            batch_size: Training batch size
            learning_rate: Learning rate for training
            
        Returns:
            Fine-tuning job ID
        """
        import requests
        
        # Prepare the fine-tuning request payload
        payload = {
            "model": model_name,
            "training_file": training_data,
            "hyperparameters": {
                "epochs": epochs,
                "batch_size": batch_size,
                "learning_rate": learning_rate
            }
        }
        
        if validation_data:
            payload["validation_file"] = validation_data
            
        try:
            response = requests.post(
                f"{self.base_url}/fine-tunes",
                headers=self.headers,
                json=payload
            )
            
            if response.status_code == 200:
                return response.json()
            else:
                logger.error(f"Error in fine-tuning request: {response.text}")
                return None
                
        except Exception as e:
            logger.error(f"Exception during API request: {e}")
            return None
    
    def check_fine_tune_status(self, job_id):
        """Check the status of a fine-tuning job"""
        import requests
        
        try:
            response = requests.get(
                f"{self.base_url}/fine-tunes/{job_id}",
                headers=self.headers
            )
            
            if response.status_code == 200:
                return response.json()
            else:
                logger.error(f"Error checking job status: {response.text}")
                return None
                
        except Exception as e:
            logger.error(f"Exception during status check: {e}")
            return None

def prepare_data_for_grok_api(input_data, output_file):
    """
    Convert data to the format expected by Grok API
    
    Args:
        input_data: Path to input data file
        output_file: Path to save the formatted data
    """
    with open(input_data, 'r', encoding='utf-8') as f:
        data = json.load(f)
    
    # Format data for Grok API fine-tuning
    formatted_data = []
    for item in data:
        formatted_item = {
            "messages": [
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": item['input_text']},
                {"role": "assistant", "content": item['output_text']}
            ]
        }
        formatted_data.append(formatted_item)
    
    # Save the formatted data
    with open(output_file, 'w', encoding='utf-8') as f:
        for item in formatted_data:
            f.write(json.dumps(item) + '\n')
    
    logger.info(f"Prepared {len(formatted_data)} examples for Grok API, saved to {output_file}")
    return output_file

def train_hybrid_model(model, train_dataloader, validation_dataloader=None, 
                     epochs=3, learning_rate=5e-5, warmup_steps=0,
                     device="cuda", save_path="./hybrid_model"):
    """
    Train the hybrid model
    
    Args:
        model: BERTGrokHybridModel instance
        train_dataloader: DataLoader for training data
        validation_dataloader: Optional DataLoader for validation
        epochs: Number of training epochs
        learning_rate: Learning rate
        warmup_steps: Number of warmup steps for scheduler
        device: Device to train on ('cuda' or 'cpu')
        save_path: Path to save the model
    """
    model.to(device)
    
    # Prepare optimizer and scheduler
    optimizer = AdamW(model.parameters(), lr=learning_rate)
    total_steps = len(train_dataloader) * epochs
    scheduler = get_linear_schedule_with_warmup(
        optimizer, 
        num_warmup_steps=warmup_steps, 
        num_training_steps=total_steps
    )
    
    # Training loop
    for epoch in range(epochs):
        model.train()
        total_loss = 0
        
        # Training
        train_iterator = tqdm(train_dataloader, desc=f"Epoch {epoch+1}/{epochs}")
        for batch in train_iterator:
            # Move batch to device
            batch = {k: v.to(device) for k, v in batch.items()}
            
            # Forward pass
            outputs = model(
                input_ids=batch['input_ids'],
                attention_mask=batch['attention_mask'],
                labels=batch.get('labels')
            )
            
            loss = outputs['loss']
            total_loss += loss.item()
            
            # Backward pass
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()
            scheduler.step()
            optimizer.zero_grad()
            
            # Update progress bar
            train_iterator.set_postfix({"loss": loss.item()})
        
        avg_train_loss = total_loss / len(train_dataloader)
        logger.info(f"Average training loss: {avg_train_loss:.4f}")
        
        # Validation
        if validation_dataloader:
            model.eval()
            eval_loss = 0
            
            with torch.no_grad():
                for batch in tqdm(validation_dataloader, desc="Validation"):
                    batch = {k: v.to(device) for k, v in batch.items()}
                    outputs = model(
                        input_ids=batch['input_ids'],
                        attention_mask=batch['attention_mask'],
                        labels=batch.get('labels')
                    )
                    eval_loss += outputs['loss'].item()
            
            avg_eval_loss = eval_loss / len(validation_dataloader)
            logger.info(f"Validation loss: {avg_eval_loss:.4f}")
    
    # Save the model
    os.makedirs(save_path, exist_ok=True)
    torch.save(model.state_dict(), os.path.join(save_path, "hybrid_model.pt"))
    logger.info(f"Model saved to {save_path}")

def main():
    parser = argparse.ArgumentParser(description="Train BERT+Grok hybrid model")
    parser.add_argument("--data_path", type=str, required=True, help="Path to training data JSON")
    parser.add_argument("--grok_api_key", type=str, required=True, help="Grok API key")
    parser.add_argument("--bert_model", type=str, default="bert-base-uncased", help="BERT model name")
    parser.add_argument("--epochs", type=int, default=3, help="Number of training epochs")
    parser.add_argument("--batch_size", type=int, default=8, help="Training batch size")
    parser.add_argument("--learning_rate", type=float, default=5e-5, help="Learning rate")
    parser.add_argument("--max_length", type=int, default=512, help="Maximum sequence length")
    parser.add_argument("--output_dir", type=str, default="./hybrid_model", help="Output directory")
    parser.add_argument("--use_cpu", action="store_true", help="Use CPU instead of CUDA")
    parser.add_argument("--api_only", action="store_true", help="Use only Grok API for fine-tuning")
    
    args = parser.parse_args()
    
    # Determine device
    device = "cpu" if args.use_cpu or not torch.cuda.is_available() else "cuda"
    logger.info(f"Using device: {device}")
    
    # If using API-only mode
    if args.api_only:
        logger.info("Using Grok API only for fine-tuning")
        api_handler = GrokAPIHandler(args.grok_api_key)
        
        # Prepare data for API
        grok_formatted_data = prepare_data_for_grok_api(
            args.data_path, 
            os.path.join(args.output_dir, "grok_formatted_data.jsonl")
        )
        
        # Start fine-tuning job
        job_result = api_handler.fine_tune_model(
            training_data=grok_formatted_data,
            model_name="grok-1",
            epochs=args.epochs,
            batch_size=args.batch_size,
            learning_rate=args.learning_rate
        )
        
        if job_result:
            logger.info(f"Fine-tuning job started with ID: {job_result.get('id')}")
            logger.info("You can check the status with:")
            logger.info(f"  python -c \"from your_module import GrokAPIHandler; handler = GrokAPIHandler('{args.grok_api_key}'); print(handler.check_fine_tune_status('{job_result.get('id')}'))\"")
        else:
            logger.error("Failed to start fine-tuning job")
            
    else:
        # Initialize tokenizer from BERT
        tokenizer = AutoTokenizer.from_pretrained(args.bert_model)
        
        # Create dataset and dataloaders
        dataset = CustomDataset(args.data_path, tokenizer, max_length=args.max_length)
        
        # Split dataset into train/validation
        train_size = int(0.9 * len(dataset))
        val_size = len(dataset) - train_size
        train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])
        
        train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True)
        val_dataloader = DataLoader(val_dataset, batch_size=args.batch_size)
        
        # Initialize model
        model = BERTGrokHybridModel(
            bert_model_name=args.bert_model,
            grok_api_key=args.grok_api_key,
            device=device
        )
        
        # Train model
        train_hybrid_model(
            model=model,
            train_dataloader=train_dataloader,
            validation_dataloader=val_dataloader,
            epochs=args.epochs,
            learning_rate=args.learning_rate,
            device=device,
            save_path=args.output_dir
        )

if __name__ == "__main__":
    main()

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import AdamW, get_linear_schedule_with_warmup
import json
import requests
import logging
import argparse

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class BERTGrokHybrid(nn.Module):
    """Hybrid model combining BERT with Grok API"""
    def __init__(self, bert_model="bert-base-uncased", 
                 grok_api_key="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"):
        super(BERTGrokHybrid, self).__init__()
        
        # BERT for feature extraction
        self.bert = AutoModelForSequenceClassification.from_pretrained(bert_model)
        self.tokenizer = AutoTokenizer.from_pretrained(bert_model)
        
        # Bridge between BERT and Grok
        self.bridge = nn.Sequential(
            nn.Linear(self.bert.config.hidden_size, 2048),
            nn.LayerNorm(2048),
            nn.GELU(),
            nn.Linear(2048, 4096)  # Grok's expected input size
        )
        
        # Grok API key
        self.grok_api_key = grok_api_key
        self.headers = {
            "Authorization": f"Bearer {grok_api_key}",
            "Content-Type": "application/json"
        }

    def forward(self, input_ids, attention_mask=None):
        # Get BERT features
        outputs = self.bert(
            input_ids=input_ids,
            attention_mask=attention_mask,
            output_hidden_states=True
        )
        
        # Extract [CLS] token embeddings
        bert_features = outputs.hidden_states[-1][:, 0, :]
        
        # Transform for Grok compatibility
        grok_features = self.bridge(bert_features)
        
        return grok_features
    
    def fine_tune_with_grok(self, input_data):
        """Send data to Grok API for fine-tuning"""
        url = "https://api.grok.ai/v1/fine-tuning"
        
        response = requests.post(
            url,
            headers=self.headers,
            json={"training_data": input_data, "model": "grok-1"}
        )
        
        if response.status_code == 200:
            return response.json()
        else:
            logger.error(f"Grok API error: {response.text}")
            return None

class CustomDataset(Dataset):
    """Simple dataset for the hybrid model"""
    def __init__(self, data_path, tokenizer, max_length=512):
        self.tokenizer = tokenizer
        self.max_length = max_length
        
        # Load data
        with open(data_path, 'r', encoding='utf-8') as f:
            self.data = json.load(f)
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        item = self.data[idx]
        
        # Tokenize input
        encoded = self.tokenizer(
            item['input_text'],
            padding='max_length',
            truncation=True,
            max_length=self.max_length,
            return_tensors='pt'
        )
        
        result = {
            'input_ids': encoded['input_ids'].squeeze(),
            'attention_mask': encoded['attention_mask'].squeeze(),
        }
        
        # Add labels if available
        if 'output_text' in item:
            result['output_text'] = item['output_text']
            
        return result

def prepare_for_grok_api(dataset):
    """Format data for Grok API"""
    formatted_data = []
    
    for item in dataset.data:
        formatted_item = {
            "messages": [
                {"role": "user", "content": item['input_text']},
                {"role": "assistant", "content": item['output_text']}
            ]
        }
        formatted_data.append(formatted_item)
    
    return formatted_data

def main():
    parser = argparse.ArgumentParser(description="BERT+Grok hybrid fine-tuning")
    parser.add_argument("--data_path", required=True, help="sample_train_data.csv")
    parser.add_argument("--grok_api_key", required=True, help="Grok API key")
    parser.add_argument("--bert_model", default="bert-base-uncased", help="BERT model name")
    parser.add_argument("--api_only", action="store_true", help="Use only Grok API")
    args = parser.parse_args()
    
    # Initialize model and tokenizer
    model = BERTGrokHybrid(args.bert_model, args.grok_api_key)
    dataset = CustomDataset(args.data_path, model.tokenizer)
    
    # API-only mode: just use Grok for fine-tuning
    if args.api_only:
        logger.info("Using Grok API for fine-tuning")
        formatted_data = prepare_for_grok_api(dataset)
        result = model.fine_tune_with_grok(formatted_data)
        
        if result:
            logger.info(f"Fine-tuning job started: {result.get('id')}")
        else:
            logger.error("Failed to start fine-tuning")
        return
    
    # Hybrid model training with local BERT + Grok API
    dataloader = DataLoader(dataset, batch_size=8, shuffle=True)
    
    # Set up optimizer
    optimizer = AdamW(model.parameters(), lr=3e-5)
    
    # Training loop demonstration (simplified)
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model.to(device)
    
    for epoch in range(3):
        for batch in dataloader:
            # Move batch to device
            batch = {k: v.to(device) if isinstance(v, torch.Tensor) else v 
                    for k, v in batch.items()}
            
            # Get BERT+bridge features
            features = model(batch['input_ids'], batch['attention_mask'])
            
            # For actual training, you would:
            # 1. Send these features to Grok API
            # 2. Get prediction/loss from Grok
            # 3. Backpropagate through the model
            
            # Placeholder for demonstration
            dummy_loss = features.mean()
            dummy_loss.backward()
            optimizer.step()
            optimizer.zero_grad()
    
    # Save the model
    torch.save(model.state_dict(), "bert_grok_hybrid.pt")
    logger.info("Model saved to bert_grok_hybrid.pt")

if __name__ == "__main__":
    main()

ImportError: cannot import name 'AdamW' from 'transformers' (c:\Users\aravi\AppData\Local\Programs\Python\Python312\Lib\site-packages\transformers\__init__.py)

In [6]:
import torch
import torch.nn as nn
import pandas as pd
from transformers import AutoTokenizer, AutoModel
import requests

# ✅ Your Grok API key (hardcoded)
GROK_API_KEY = "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"
HEADERS = {
    "Authorization": f"Bearer {GROK_API_KEY}",
    "Content-Type": "application/json"
}

# ✅ CSV Dataset Loader
class WikiHowCSVDataset:
    def __init__(self, csv_path, tokenizer, max_length=512):
        self.data = pd.read_csv(csv_path, encoding="ISO-8859-1").dropna()
        self.tokenizer = tokenizer
        self.max_length = max_length

    def tokenize(self):
        inputs = []
        for _, row in self.data.iterrows():
            tokens = self.tokenizer(
                row["input_text"],
                padding="max_length",
                truncation=True,
                max_length=self.max_length,
                return_tensors="pt"
            )
            inputs.append({
                "input_ids": tokens["input_ids"].squeeze(),
                "attention_mask": tokens["attention_mask"].squeeze(),
                "output_text": row["output_text"],
                "input_text": row["input_text"]
            })
        return inputs

# ✅ BERT + Grok projection bridge
class BERTGrokHybrid(nn.Module):
    def __init__(self, model_name="bert-base-uncased"):
        super().__init__()
        self.bert = AutoModel.from_pretrained(model_name)
        self.bridge = nn.Sequential(
            nn.Linear(self.bert.config.hidden_size, 2048),
            nn.ReLU(),
            nn.Linear(2048, 4096)  # Match Grok input size
        )

    def forward(self, input_ids, attention_mask):
        with torch.no_grad():
            last_hidden = self.bert(input_ids=input_ids, attention_mask=attention_mask).last_hidden_state
        return self.bridge(last_hidden[:, 0])  # [CLS] token

# ✅ Format data for Grok fine-tuning
def prepare_for_grok(data_items):
    return [
        {
            "messages": [
                {"role": "user", "content": item["input_text"]},
                {"role": "assistant", "content": item["output_text"]}
            ]
        }
        for item in data_items
    ]

# ✅ Grok API fine-tuning call
def fine_tune_with_grok(formatted_data):
    response = requests.post(
        "https://api.grok.ai/v1/fine-tuning",
        headers=HEADERS,
        json={"training_data": formatted_data, "model": "grok-1"}, 
        verify= False
    )
    if response.status_code == 200:
        print("✅ Fine-tuning started:", response.json().get("id"))
    else:
        print("❌ Error:", response.text)

# ✅ End-to-end pipeline
def run_pipeline(csv_path="wikihow.csv"):
    tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
    dataset = WikiHowCSVDataset(csv_path, tokenizer)
    tokenized_data = dataset.tokenize()
    formatted_data = prepare_for_grok(tokenized_data)
    fine_tune_with_grok(formatted_data)

run_pipeline("C:/Users/aravi/Downloads/archive (2)/wikihowAll.csv")

  self.data = pd.read_csv(csv_path, encoding="ISO-8859-1").dropna()


SSLError: HTTPSConnectionPool(host='api.grok.ai', port=443): Max retries exceeded with url: /v1/fine-tuning (Caused by SSLError(SSLError(1, '[SSL: TLSV1_UNRECOGNIZED_NAME] tlsv1 unrecognized name (_ssl.c:1000)')))