In [1]:
import sqlite3
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss, roc_auc_score

In [None]:
def probability_to_american_odds(prob):
    epsilon = 1e-10
    prob = max(min(prob, 1 - epsilon), epsilon)
    if prob > 0.5:
        return -100 * (prob / (1 - prob))
    else:
        return 100 * ((1 - prob) / prob)

# Starting Features

1. Rushing/Receiving Yards
2. Touchdowns Scored Average (Historical) 
3. Opponent Defense Rating
4. Team Offensive Rank
5. Matchup-Specific Metrics (e.g., Historical Performance Against Specific Opponent)
6. Weather Conditions

In [None]:
# XGBoost Skeleton Code

import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Prepare the data
X = df[['rushing_receiving_yards', 'tds_scored_avg', 'opponent_defense_rating', 
        'team_offensive_rank', 'historical_vs_opponent', 'weather_conditions']]
y = df['touchdown']  # Binary target variable (1 for touchdown, 0 otherwise)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the XGBoost model
model = xgb.XGBClassifier(
    objective='binary:logistic', 
    eval_metric='logloss', 
    use_label_encoder=False
)

# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.4f}')


In [None]:
# Random Forest Skeleton Code

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Prepare the data
X = df[['rushing_receiving_yards', 'tds_scored_avg', 'opponent_defense_rating', 
        'team_offensive_rank', 'historical_vs_opponent', 'weather_conditions']]
y = df['touchdown']  # Binary target variable (1 for touchdown, 0 otherwise)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the Random Forest model
model = RandomForestClassifier(
    n_estimators=100, 
    random_state=42
)

# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.4f}')


Here's a basic skeleton code for each of the three recommended models: XGBoost, LightGBM, and Random Forest. These examples assume you're using Python with common machine learning libraries such as `scikit-learn`, `xgboost`, and `lightgbm`.

### 1. **XGBoost**
```python
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Assume X and y are your features and target variables from nfl.db
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the XGBoost model
model = xgb.XGBClassifier(
    objective='binary:logistic', 
    eval_metric='logloss', 
    use_label_encoder=False
)

# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.4f}')
```

### 2. **LightGBM**
```python
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Assume X and y are your features and target variables from nfl.db
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the LightGBM model
model = lgb.LGBMClassifier(
    objective='binary', 
    metric='binary_logloss'
)

# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.4f}')
```

### 3. **Random Forest**
```python
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Assume X and y are your features and target variables from nfl.db
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the Random Forest model
model = RandomForestClassifier(
    n_estimators=100, 
    random_state=42
)

# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.4f}')
```

### Key Points
- **Data Preparation:** These examples assume that you have already prepared your feature matrix `X` and target vector `y`.
- **Model Initialization:** Each model is initialized with basic parameters. You can tune these parameters to improve performance based on your specific data.
- **Training:** The models are trained using the `fit` method on the training data.
- **Prediction:** Predictions are made on the test data, and the accuracy is calculated as a basic performance metric. Depending on your needs, you might want to use other metrics like AUC-ROC, precision, recall, or F1-score.
- **Evaluation:** The accuracy is printed as a simple measure of performance.

You can further enhance these skeleton codes by adding data preprocessing steps, feature selection, cross-validation, hyperparameter tuning, and more depending on the complexity of your task and dataset.