In [1]:
pip install transformers datasets torch scikit-learn


Collecting datasets
  Downloading datasets-3.3.0-py3-none-any.whl.metadata (19 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer

# Load the dataset
df = pd.read_csv("/content/sc.csv")

# Split into train and test sets
train_texts, val_texts, train_labels, val_labels = train_test_split(
    df['text'].tolist(), df['label'].tolist(), test_size=0.2)

# Load the BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Tokenize the texts
train_encodings = tokenizer(train_texts, truncation=True, padding=True, max_length=128)
val_encodings = tokenizer(val_texts, truncation=True, padding=True, max_length=128)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [3]:
import torch

class SatelliteDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

# Convert the labels into numeric format (assuming 5 orbit types: LEO, MEO, GEO, HEO, SSO)
label2id = {'LEO': 0, 'MEO': 1, 'GEO': 2, 'HEO': 3, 'SSO': 4}
train_labels = [label2id[label] for label in train_labels]
val_labels = [label2id[label] for label in val_labels]

train_dataset = SatelliteDataset(train_encodings, train_labels)
val_dataset = SatelliteDataset(val_encodings, val_labels)


In [4]:
from transformers import BertForSequenceClassification

# Load pre-trained BERT model for sequence classification
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=5)


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [5]:
from transformers import Trainer, TrainingArguments

# Define training arguments
training_args = TrainingArguments(
    output_dir='./results',          # Output directory
    num_train_epochs=3,              # Number of training epochs
    per_device_train_batch_size=8,   # Batch size per device
    per_device_eval_batch_size=8,    # Evaluation batch size
    warmup_steps=500,                # Number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # Strength of weight decay
    logging_dir='./logs',            # Directory for storing logs
    logging_steps=10,
    evaluation_strategy="epoch"      # Evaluate at the end of every epoch
)

# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset
)




In [6]:
trainer.train()




<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mudayvardhan-998[0m ([33mudayvardhan-998-vit[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Epoch,Training Loss,Validation Loss


Epoch,Training Loss,Validation Loss
1,0.0004,0.000263
2,0.0002,0.000104
3,0.0001,7.9e-05


TrainOutput(global_step=3000, training_loss=0.07179593597562052, metrics={'train_runtime': 8177.3303, 'train_samples_per_second': 2.935, 'train_steps_per_second': 0.367, 'total_flos': 160337618064000.0, 'train_loss': 0.07179593597562052, 'epoch': 3.0})

In [7]:
trainer.evaluate()


{'eval_loss': 7.856027514208108e-05,
 'eval_runtime': 136.5978,
 'eval_samples_per_second': 14.642,
 'eval_steps_per_second': 1.83,
 'epoch': 3.0}

In [8]:
model.save_pretrained('./fine_tuned_bert_model')
tokenizer.save_pretrained('./fine_tuned_bert_model')


('./fine_tuned_bert_model/tokenizer_config.json',
 './fine_tuned_bert_model/special_tokens_map.json',
 './fine_tuned_bert_model/vocab.txt',
 './fine_tuned_bert_model/added_tokens.json')

In [9]:
from transformers import BertTokenizer, BertForSequenceClassification
import torch

# Load the fine-tuned model
model_path = "./fine_tuned_bert_model"
model = BertForSequenceClassification.from_pretrained(model_path)
tokenizer = BertTokenizer.from_pretrained(model_path)

# Move model to the appropriate device (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()  # Set to evaluation mode


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [10]:
id2label = {0: 'LEO', 1: 'MEO', 2: 'GEO', 3: 'HEO', 4: 'SSO'}


In [11]:
def predict_orbit(text):
    # Tokenize input text
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)

    # Move input tensors to the correct device
    inputs = {key: val.to(device) for key, val in inputs.items()}

    # Get model predictions
    with torch.no_grad():
        outputs = model(**inputs)

    # Get the predicted class
    predicted_class = torch.argmax(outputs.logits, dim=1).item()

    return id2label[predicted_class]


In [12]:
# Example test cases
test_cases = [
    "Providing global positioning and navigation services",
    "Weather monitoring for hurricanes and storms",
    "Military reconnaissance and surveillance imaging",
    "Tracking sea levels and monitoring ocean temperatures",
    "Satellite communication in polar regions"
]

# Run predictions
for text in test_cases:
    prediction = predict_orbit(text)
    print(f"Satellite Function: {text}\nPredicted Orbit: {prediction}\n")


Satellite Function: Providing global positioning and navigation services
Predicted Orbit: MEO

Satellite Function: Weather monitoring for hurricanes and storms
Predicted Orbit: GEO

Satellite Function: Military reconnaissance and surveillance imaging
Predicted Orbit: LEO

Satellite Function: Tracking sea levels and monitoring ocean temperatures
Predicted Orbit: LEO

Satellite Function: Satellite communication in polar regions
Predicted Orbit: HEO



In [13]:
import requests

# Fetch data from CelesTrak
url = "https://celestrak.org/NORAD/elements/gp.php?GROUP=active&FORMAT=json"
response = requests.get(url)
satellite_data = response.json()

# Print a sample
print(satellite_data[:2])  # Preview first 2 satellite entries


[{'OBJECT_NAME': 'CALSPHERE 1', 'OBJECT_ID': '1964-063C', 'EPOCH': '2025-02-14T23:18:50.699232', 'MEAN_MOTION': 13.75813916, 'ECCENTRICITY': 0.0024285, 'INCLINATION': 90.207, 'RA_OF_ASC_NODE': 60.6514, 'ARG_OF_PERICENTER': 331.3894, 'MEAN_ANOMALY': 99.1103, 'EPHEMERIS_TYPE': 0, 'CLASSIFICATION_TYPE': 'U', 'NORAD_CAT_ID': 900, 'ELEMENT_SET_NO': 999, 'REV_AT_EPOCH': 441, 'BSTAR': 0.001042, 'MEAN_MOTION_DOT': 1.019e-05, 'MEAN_MOTION_DDOT': 0}, {'OBJECT_NAME': 'CALSPHERE 2', 'OBJECT_ID': '1964-063E', 'EPOCH': '2025-02-14T19:15:06.034752', 'MEAN_MOTION': 13.5284705, 'ECCENTRICITY': 0.0017872, 'INCLINATION': 90.2207, 'RA_OF_ASC_NODE': 64.4216, 'ARG_OF_PERICENTER': 196.4873, 'MEAN_ANOMALY': 342.1793, 'EPHEMERIS_TYPE': 0, 'CLASSIFICATION_TYPE': 'U', 'NORAD_CAT_ID': 902, 'ELEMENT_SET_NO': 999, 'REV_AT_EPOCH': 79036, 'BSTAR': 6.0966e-05, 'MEAN_MOTION_DOT': 4.9e-07, 'MEAN_MOTION_DDOT': 0}]


In [16]:
def best_orbit_choice(satellite_function):
    orbits = ['LEO', 'MEO', 'GEO', 'HEO', 'SSO']
    orbit_congestion = {'LEO': 5000, 'MEO': 1200, 'GEO': 400, 'HEO': 150, 'SSO': 800}  # Example values

    # Use BERT model to predict orbit category
    predicted_orbit = predict_orbit(satellite_function)

    # Check congestion in that orbit
    if orbit_congestion[predicted_orbit] > 3000:  # Threshold for high congestion
        recommended_orbit = min(orbit_congestion, key=orbit_congestion.get)  # Find least crowded orbit
        return f"⚠️ {predicted_orbit} is highly congested. Suggested alternative: {recommended_orbit}"
    return f"✅ Best orbit: {predicted_orbit}"


In [15]:
test_function = "Global satellite communication for remote areas"
print(best_orbit_choice(test_function))


✅ Best orbit: HEO


In [17]:
orbit_altitudes = {
    'LEO': '160 - 2,000 km',
    'MEO': '2,000 - 35,786 km',
    'GEO': '35,786 km',
    'HEO': '> 35,786 km',
    'SSO': '600 - 800 km'
}


In [18]:
def best_orbit_choice(satellite_function):
    # Orbit congestion values (example data, replace with real-time data)
    orbit_congestion = {'LEO': 5000, 'MEO': 1200, 'GEO': 400, 'HEO': 150, 'SSO': 800}

    # Predict orbit using BERT model
    predicted_orbit = predict_orbit(satellite_function)

    # Check congestion in the predicted orbit
    if orbit_congestion[predicted_orbit] > 3000:  # Threshold for high congestion
        recommended_orbit = min(orbit_congestion, key=orbit_congestion.get)  # Find least crowded orbit
        return f"⚠️ {predicted_orbit} is highly congested. Suggested alternative: {recommended_orbit}\n🌍 Altitude: {orbit_altitudes[recommended_orbit]}"

    return f"✅ Best orbit: {predicted_orbit}\n🌍 Altitude: {orbit_altitudes[predicted_orbit]}"


In [19]:
test_function = "Global satellite communication for remote areas"
print(best_orbit_choice(test_function))


✅ Best orbit: HEO
🌍 Altitude: > 35,786 km


In [21]:
from transformers import TFBertForSequenceClassification
import tensorflow as tf

# Load the trained BERT model
model_path = "./fine_tuned_bert_model"
model = TFBertForSequenceClassification.from_pretrained(model_path)

# Save using the SavedModel format
model.save("bert_orbit_classifier", save_format="tf")
print("✅ Model saved as bert_orbit_classifier in SavedModel format")

All PyTorch model weights were used when initializing TFBertForSequenceClassification.

All the weights of TFBertForSequenceClassification were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertForSequenceClassification for predictions without further training.


✅ Model saved as bert_orbit_classifier in SavedModel format


In [26]:
model.save("bert_orbit_classifier", save_format="tf")
print("✅ Model saved in TensorFlow SavedModel format.")


✅ Model saved in TensorFlow SavedModel format.


In [28]:
model.save_weights("bert_orbit_classifier_weights.h5")
print("✅ Weights saved successfully.")


✅ Weights saved successfully.
