In [3]:
pip install tensorflow


Defaulting to user installation because normal site-packages is not writeable
Collecting tensorflow
  Downloading tensorflow-2.18.0-cp312-cp312-macosx_12_0_arm64.whl.metadata (4.0 kB)
Collecting absl-py>=1.0.0 (from tensorflow)
  Downloading absl_py-2.1.0-py3-none-any.whl.metadata (2.3 kB)
Collecting astunparse>=1.6.0 (from tensorflow)
  Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=24.3.25 (from tensorflow)
  Downloading flatbuffers-25.1.24-py2.py3-none-any.whl.metadata (875 bytes)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow)
  Downloading gast-0.6.0-py3-none-any.whl.metadata (1.3 kB)
Collecting google-pasta>=0.1.1 (from tensorflow)
  Downloading google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting libclang>=13.0.0 (from tensorflow)
  Downloading libclang-18.1.1-1-py2.py3-none-macosx_11_0_arm64.whl.metadata (5.2 kB)
Collecting opt-einsum>=2.3.2 (from tensorflow)
  Downloading opt_einsum-3.4.0-py3-none-any

In [1]:
import json
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Embedding
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import StandardScaler


Matplotlib is building the font cache; this may take a moment.


In [None]:
# -------------------------------
# 1. Load and Process JSON Data
# -------------------------------
def load_json_data(file_path):
    with open(file_path, 'r') as file:
        data = json.load(file)
    return dat

In [None]:

# -------------------------------
# 2. Tokenizer for Math & Story Problems
# -------------------------------
def preprocess_data(json_data):
    math_problems = [item['math'] for item in json_data if 'math' in item]
    story_problems = [item['story'] for item in json_data if 'story' in item]

    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(math_problems + story_problems)

    math_sequences = tokenizer.texts_to_sequences(math_problems)
    story_sequences = tokenizer.texts_to_sequences(story_problems)

    max_length = max(len(seq) for seq in math_sequences + story_sequences)
    math_sequences = pad_sequences(math_sequences, maxlen=max_length, padding='post')
    story_sequences = pad_sequences(story_sequences, maxlen=max_length, padding='post')

    return math_sequences, story_sequences, tokenizer


In [None]:

# -------------------------------
# 3. Drake's Formula for Numerical Processing
# -------------------------------
def apply_drakes_formula(data):
    """
    Applies Drake's Formula-style weighting to numerical data.
    This is just an initial formula structure that can be adjusted.
    """
    num_factors = len(data[0]) if len(data) > 0 else 1
    multiplier = 1.0 + (num_factors * 0.1)
    
    scaled_data = np.array(data) * multiplier
    normalization_factor = np.max(scaled_data)
    return scaled_data / normalization_factor

In [None]:
# -------------------------------
# 4. Neural Network Model for Predictions
# -------------------------------
def build_model(input_dim, output_dim=1):
    model = Sequential([
        Embedding(input_dim=input_dim, output_dim=64, input_length=100),  # Token embedding layer
        LSTM(64, return_sequences=True),
        LSTM(32),
        Dense(16, activation='relu'),
        Dense(output_dim, activation='sigmoid')  # Binary prediction output
    ])
    
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model


In [None]:

# -------------------------------
# 5. Training the Model
# -------------------------------
def train_model(json_file):
    json_data = load_json_data(json_file)
    math_data, story_data, tokenizer = preprocess_data(json_data)

    # Apply Drake’s Formula to math data
    math_data_scaled = apply_drakes_formula(math_data)

    # Splitting into training & validation sets
    split_idx = int(len(math_data_scaled) * 0.8)
    x_train, x_val = math_data_scaled[:split_idx], math_data_scaled[split_idx:]
    y_train, y_val = np.ones(len(x_train)), np.ones(len(x_val))  # Placeholder labels for now

    # Train model
    model = build_model(input_dim=len(tokenizer.word_index) + 1)
    model.fit(x_train, y_train, epochs=10, batch_size=32, validation_data=(x_val, y_val))

    return model, tokenizer


In [None]:

# -------------------------------
# 6. Save & Load Model Functions
# -------------------------------
def save_model(model, tokenizer, model_path="ai_model.h5", tokenizer_path="tokenizer.json"):
    model.save(model_path)
    with open(tokenizer_path, 'w') as f:
        json.dump(tokenizer.word_index, f)

def load_saved_model(model_path="ai_model.h5", tokenizer_path="tokenizer.json"):
    model = tf.keras.models.load_model(model_path)
    with open(tokenizer_path, 'r') as f:
        word_index = json.load(f)
    tokenizer = Tokenizer()
    tokenizer.word_index = word_index
    return model, tokenizer

In [None]:

# -------------------------------
# 7. Prediction Function
# -------------------------------
def predict(input_text, model, tokenizer):
    sequence = tokenizer.texts_to_sequences([input_text])
    padded_sequence = pad_sequences(sequence, maxlen=100, padding='post')
    prediction = model.predict(padded_sequence)
    return "Positive" if prediction[0][0] > 0.5 else "Negative"


In [None]:

# -------------------------------
# RUN TRAINING
# -------------------------------
if __name__ == "__main__":
    json_file = "training_data.json"  # Change this to the actual file path
    model, tokenizer = train_model(json_file)
    save_model(model, tokenizer)

    # Test prediction
    test_input = "If a train is traveling at 60mph and it departs at 3:00 PM..."
    print("Prediction:", predict(test_input, model, tokenizer))