### Sqlite

In [1]:
import sqlite3
import pandas as pd
from IPython.display import display
from typing import Union, List, Dict, Any, Optional

def execute_sql(
    query: str, 
    db_path: str = 'innotrain.db', 
    return_type: str = 'df',
    params: Optional[Union[tuple, dict]] = None
) -> Union[pd.DataFrame, List[Dict[str, Any]], int, None]:
    """
    Execute SQL queries on SQLite database with flexible return types.
    
    Args:
        query: SQL query to execute
        db_path: Path to SQLite database file
        return_type: 'df' for DataFrame, 'dict' for list of dicts, 
                    'one' for single value, 'none' for no return
        params: Parameters for parameterized queries
        
    Returns:
        Results based on return_type:
        - 'df': pandas DataFrame
        - 'dict': List of dictionaries
        - 'one': Single value
        - 'none': None (for INSERT/UPDATE/DELETE)
    """
    try:
        with sqlite3.connect(db_path) as conn:
            conn.row_factory = sqlite3.Row  # Enable column access by name
            cursor = conn.cursor()
            
            # Execute query with parameters if provided
            if params:
                cursor.execute(query, params)
            else:
                cursor.execute(query)
            
            # For SELECT queries, fetch results based on return_type
            if query.strip().upper().startswith(('SELECT', 'PRAGMA')):
                if return_type == 'df':
                    return pd.read_sql_query(query, conn, params=params)
                elif return_type == 'dict':
                    return [dict(row) for row in cursor.fetchall()]
                elif return_type == 'one':
                    result = cursor.fetchone()
                    return result[0] if result else None
                else:
                    return None
            
            # For other queries (INSERT/UPDATE/DELETE)
            conn.commit()
            
            if return_type == 'one':
                return cursor.lastrowid
            return None
            
    except sqlite3.Error as e:
        print(f"Database error: {e}")
        raise
    except Exception as e:
        print(f"Error in execute_sql: {e}")
        raise

In [3]:
query = "SELECT name FROM sqlite_master WHERE type='table';"
execute_sql(query)

Unnamed: 0,name
0,aerich
1,sqlite_sequence
2,training_job
3,training_iteration
4,epoch_train
5,eval


In [2]:
query = """select * from training_job"""
dfjob = execute_sql(query)
dfjob.tail()

Unnamed: 0,uuid,created_at,started_at,completed_at,project_id,training_run_id,project_yaml_config,training_request,machine_config,status,time_taken,job_metadata
0,1c27fdec-13b5-4cc4-a476-335822a0fe5c,2026-01-24T17:08:14+05:30,2026-01-24T17:16:29+05:30,,7bce834a-bd56-4fa6-89d6-dcd2acb0b4cd,c1aa9a60-0944-4261-aaea-517dafcd74bc,"{""project_name"": ""spam local"", ""project_id"": ""...","{""success"": true, ""message"": ""Training run sta...","{""instance_id"": ""e0589d3cf69043d7baf8b568a06aa...",RUNNING,,


In [6]:
query = """select * from training_iteration"""
dfiter = execute_sql(query)
dfiter.tail()

Unnamed: 0,uuid,created_at,completed_at,time_taken,training_job_uuid,iteration_number,step_type,step_config,iteration_metadata
7,15a9eae6-3bce-44e4-a2c3-22d26ac33597,2026-01-24T17:18:18+05:30,2026-01-24T17:18:47+05:30,0.48,1c27fdec-13b5-4cc4-a476-335822a0fe5c,2,ITERATION,"{""project_name"": ""spam local"", ""no_iterations""...",
8,95b3642e-39fc-460d-a1d3-8effda84991f,2026-01-24T17:18:18+05:30,2026-01-24T17:18:27+05:30,0.14,1c27fdec-13b5-4cc4-a476-335822a0fe5c,2,TRAJECTORY,"{""project_name"": ""spam local"", ""trajectory_nam...",
9,56d27061-595f-4395-9b3e-55eab9a71bc5,2026-01-24T17:18:27+05:30,2026-01-24T17:18:35+05:30,0.13,1c27fdec-13b5-4cc4-a476-335822a0fe5c,2,TRAINING,"{""project_name"": ""spam local"", ""trajectory_nam...",
10,e788e324-cca7-4513-84ec-563ecbbcecde,2026-01-24T17:18:35+05:30,2026-01-24T17:18:40+05:30,0.08,1c27fdec-13b5-4cc4-a476-335822a0fe5c,2,EVALUATION,"{""project_name"": ""spam local"", ""training_name""...",
11,818e7a65-01b2-4e4d-9c69-efc7abce1e03,2026-01-24T17:18:40+05:30,2026-01-24T17:18:47+05:30,0.12,1c27fdec-13b5-4cc4-a476-335822a0fe5c,2,EVALUATION,"{""project_name"": ""spam local"", ""training_name""...",


In [8]:
dfiter.shape

(12, 9)

In [11]:
dfiter[dfiter.step_type=='EVALUATION'].iloc[0]['step_config']

'{"project_name": "spam local", "training_name": "run_1", "dataset": "cv", "reward": "classify_text", "topk_eval": 1, "model_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", "max_new_tokens": 2, "eval_metric": "accuracy"}'

In [7]:
dfiter.iloc[3]['step_config']

'{"project_name": "spam local", "trajectory_name": "run_1", "trajectory_count": 2, "ref_model_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", "model_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", "max_new_tokens": 2, "reward": "classify_text", "prev_training_name": null, "prev_model_epoch": null, "topk_trajectory": 50}'

In [7]:
query = """select * from epoch_train"""
dfepoch = execute_sql(query)
dfepoch

Unnamed: 0,uuid,created_at,completed_at,time_taken,epoch_metadata,iteration_uuid,iteration_number,epoch_number,model_path,optimizer_path,metrics
0,32097af5-e084-48c3-9a1e-0796e45c2f62,2026-01-24T00:58:46+05:30,2026-01-24T00:58:47+05:30,0.016667,,fc1f1858-61bf-4979-8e8f-03053ac21fb7,1,1,output/spam local/training/run_1/models/model_...,output/spam local/training/run_1/optimizer/opt...,"{""avg_loss"": 0.0}"
1,27a90730-9c47-4dd1-90c4-ddd84318c19e,2026-01-24T00:59:13+05:30,2026-01-24T00:59:14+05:30,0.016667,,9e37caef-1f4d-44bb-96ef-01ad982c2623,2,1,output/spam local/training/run_2/models/model_...,output/spam local/training/run_2/optimizer/opt...,"{""avg_loss"": 0.0}"
2,09682b07-d74d-411a-b225-724c83dc5b07,2026-01-24T01:52:33+05:30,2026-01-24T01:52:36+05:30,0.05,,17435e91-3e20-4b0b-b82f-83de83ade5a5,1,1,output/spam local/training/run_1/models/model_...,output/spam local/training/run_1/optimizer/opt...,"{""avg_loss"": 0.0}"
3,36b74c26-8718-4db5-aec5-536f553cafbf,2026-01-24T01:53:27+05:30,2026-01-24T01:53:30+05:30,0.05,,b42bbeb4-e30a-43d5-baff-cc144c878d81,2,1,output/spam local/training/run_2/models/model_...,output/spam local/training/run_2/optimizer/opt...,"{""avg_loss"": 0.0}"


In [3]:
query = """select * from eval"""
dfeval = execute_sql(query)
dfeval.head()

Unnamed: 0,uuid,created_at,completed_at,time_taken,iteration_uuid,model_id,dataset,config,metrics,eval_data_path,eval_metadata
0,487c7935-5ee1-458c-9bbb-ca558955cc5d,2026-01-24T17:18:11+05:30,2026-01-24T17:18:13+05:30,0.03,92b6664e-ae4b-4623-b56b-d94ceb1e94d6,iteration_1_epoch_1,cv,"{""project_name"": ""spam local"", ""training_name""...","{""think_reward"": {""count"": 0.0, ""total"": 2, ""p...",output/spam local/training/run_1/eval/metrics_...,"{""model_path"": ""output/spam local/training/run..."
1,cef03927-8441-4289-a272-9a203c8c0b70,2026-01-24T17:18:15+05:30,2026-01-24T17:18:17+05:30,0.03,49ea0edc-e1ab-49fe-95a9-e1830a5c0dad,iteration_1_epoch_1,train,"{""project_name"": ""spam local"", ""training_name""...","{""think_reward"": {""count"": 0.0, ""total"": 2, ""p...",output/spam local/training/run_1/eval/metrics_...,"{""model_path"": ""output/spam local/training/run..."
2,cae6cef5-1ea1-439c-a83f-2eb4773f20d4,2026-01-24T17:18:37+05:30,2026-01-24T17:18:39+05:30,0.04,e788e324-cca7-4513-84ec-563ecbbcecde,iteration_2_epoch_1,cv,"{""project_name"": ""spam local"", ""training_name""...","{""think_reward"": {""count"": 0.0, ""total"": 2, ""p...",output/spam local/training/run_2/eval/metrics_...,"{""model_path"": ""output/spam local/training/run..."
3,34509a72-8262-4792-a280-e59da9f2dc68,2026-01-24T17:18:42+05:30,2026-01-24T17:18:46+05:30,0.07,818e7a65-01b2-4e4d-9c69-efc7abce1e03,iteration_2_epoch_1,train,"{""project_name"": ""spam local"", ""training_name""...","{""think_reward"": {""count"": 0.0, ""total"": 2, ""p...",output/spam local/training/run_2/eval/metrics_...,"{""model_path"": ""output/spam local/training/run..."


In [4]:
dfeval.iloc[0]['metrics']

'{"think_reward": {"count": 0.0, "total": 2, "percentage": 0.0}, "answer_reward": {"count": 0.0, "total": 2, "percentage": 0.0}, "format_reward": {"count": 0.0, "total": 2, "percentage": 0.0}, "thinking_length_reward": {"count": 0.0, "total": 2, "percentage": 0.0}, "total_reward": {"count": 0.0, "total": 2, "percentage": 0.0}}'

In [11]:
dfeval[dfeval['dataset']=='train']['config'].values[1]

'{"project_name": "spam local", "training_name": "run_2", "model_epoch": 1, "dataset": "train", "reward": "classify_text", "topk_eval": 1, "model_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", "max_new_tokens": 2}'

### Minio

In [1]:
from scripts.storage_client import StorageClient

In [2]:
minio_storage = StorageClient()

In [3]:
minio_storage.list_buckets()

[{'name': 'innotone-media',
  'creation_date': datetime.datetime(2025, 9, 19, 18, 24, 15, 289000, tzinfo=datetime.timezone.utc)}]

In [5]:
bucket_name = 'innotone-media'

In [14]:
bucket_name = 'innotone-media'
for obj in minio_storage.client.list_objects(bucket_name,recursive=True,prefix='media/projects/'):
    print(obj)

Object(bucket_name='innotone-media', object_name='media/projects/7bce834a-bd56-4fa6-89d6-dcd2acb0b4cd/eval/spam_cv_test.csv', last_modified=datetime.datetime(2025, 9, 23, 19, 17, 28, 602000, tzinfo=datetime.timezone.utc), etag='d65d754b39890b09bfc0838d95136530', size=325, metadata={}, version_id=None, is_latest=None, storage_class='STANDARD', owner_id=None, owner_name=None, content_type=None, is_delete_marker=False, tags=None, is_dir=False)
Object(bucket_name='innotone-media', object_name='media/projects/7bce834a-bd56-4fa6-89d6-dcd2acb0b4cd/train/spam_train_test.csv', last_modified=datetime.datetime(2025, 9, 23, 19, 17, 6, 424000, tzinfo=datetime.timezone.utc), etag='2f6b95bea94f24f9ab591d0dd751f6a3', size=557, metadata={}, version_id=None, is_latest=None, storage_class='STANDARD', owner_id=None, owner_name=None, content_type=None, is_delete_marker=False, tags=None, is_dir=False)
Object(bucket_name='innotone-media', object_name='media/projects/86359442-8cfe-4a9b-b997-7da126b8c72c/eval/

In [None]:

minio_storage.list_objects(bucket_name)

Error listing objects: 'Object' object has no attribute 'name'


[]

In [11]:
minio_storage.download_file(bucket_name=bucket_name,object_name='media/test_upload.txt',file_path='test_upload.txt')

True

In [15]:
minio_storage.download_file(bucket_name=bucket_name,object_name='media/test_upload.txt',file_path='test_download.txt')

True

In [17]:
minio_storage.upload_file(bucket_name=bucket_name,object_name="media/test_download.txt",file_path="test_download.txt")

True

In [7]:
minio_storage.get_presigned_url(bucket_name=bucket_name,object_name='media/test_download.txt',expires_seconds=3600)

'http://localhost:10000/innotone-media/media/test_download.txt?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=minioadmin%2F20251013%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20251013T164747Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=4f15fdcd1b751288e15e5c2ffa35d4c08a6f01f78d02b39f0f09ce9b7158a93e'

In [8]:
minio_storage.delete_file(bucket_name=bucket_name ,object_name='media/test_download.txt')

True

### Lambda AI

In [1]:
from scripts.lambda_client import LambdaClient

client = LambdaClient()

In [2]:
gpu_config = client.list_available_instances()
gpu_config

{'name': 'gpu_1x_a10', 'region': 'us-east-1'}

In [3]:
instance_type_name = gpu_config['name']
region_name = gpu_config['region']
instance_config = client.launch_instance(instance_type_name,region_name)

In [4]:
instance_id = instance_config['id']
instance_ip = instance_config['ip']
instance_id, instance_ip

('559ef2591e014ea8a65a799635d6479d', '129.213.138.192')

In [45]:
client.terminate_instance(instance_id)

True

### SSH

In [10]:
import paramiko

In [11]:
paramiko.__version__

'4.0.0'

In [12]:
from scripts.ssh_executor import SshExecutor

se = SshExecutor(ip=instance_ip)

In [14]:
se.execute_command("ls -la")

CommandResult(command='ls -la', stdout='total 44\r\ndrwxr-x--- 8 ubuntu ubuntu 4096 Oct 16 15:28 .\r\ndrwxr-xr-x 3 root   root   4096 Jul  7 03:53 ..\r\n-rw-r--r-- 1 ubuntu ubuntu  220 Jan  6  2022 .bash_logout\r\n-rw-r--r-- 1 ubuntu ubuntu 3771 Jan  6  2022 .bashrc\r\ndrwx------ 2 ubuntu ubuntu 4096 Oct 16 15:28 .cache\r\ndrwxr-xr-x 3 ubuntu ubuntu 4096 Oct 16 15:28 .config\r\ndrwxrwxr-x 2 ubuntu ubuntu 4096 Jul  7 04:05 .ipython\r\ndrwxr-xr-x 2 ubuntu ubuntu 4096 Oct 16 15:19 .jupyter\r\ndrwxr-xr-x 6 ubuntu ubuntu 4096 Oct 16 15:19 .local\r\n-rw-r--r-- 1 ubuntu ubuntu  807 Jan  6  2022 .profile\r\ndrwx------ 2 ubuntu ubuntu 4096 Oct 16 15:19 .ssh', stderr='', return_code=0, success=True, duration=0.9427509307861328)

In [16]:
from scripts.s3_to_server_transfer import S3ToServerTransfer
transfer = S3ToServerTransfer()


In [43]:
s3_bucket = 'innotone-media'
s3_prefix ='media/projects/7bce834a-bd56-4fa6-89d6-dcd2acb0b4cd/7b1be4c4-084d-46d7-948d-12b04b26b049'
server_ip = instance_config['ip']
server_path = 'test'
transfer.transfer_files_to_server(s3_bucket, s3_prefix, server_ip, server_path)

Found 1 file(s) to transfer
Transferring media/projects/7bce834a-bd56-4fa6-89d6-dcd2acb0b4cd/7b1be4c4-084d-46d7-948d-12b04b26b049/config.yaml to 192.222.59.217:test/config.yaml
Successfully transferred config.yaml
Cleaned up temporary directory: /var/folders/zj/2cvxzc_d59s48hbf1kttzpqr0000gn/T/s3_to_server_5g_90z9w


True

In [44]:
server_ip = instance_config['ip']
server_path = 'test'
s3_bucket = 'innotone-training'
s3_prefix = 'test'

transfer.transfer_files_to_s3(server_ip, server_path, s3_bucket, s3_prefix,recursive=True)

Transfer failed: 'CommandResult' object has no attribute 'strip'


False

In [38]:
os.path.join(server_path,'config.yaml')

'~/test/config.yaml'

In [35]:
import os

In [37]:
os.path.basename("media/projects/7bce834a-bd56-4fa6-89d6-dcd2acb0b4cd/7b1be4c4-084d-46d7-948d-12b04b26b049/config.yaml")

'config.yaml'

In [21]:
from scripts.storage_client import StorageClient
sc = StorageClient(storage_type="minio")

In [None]:
sc.list_objects(bucket_name=s3_bucket,prefix=s3_prefix)

TypeError: list_objects() got an unexpected keyword argument 'recursive'

In [30]:
result = sc.client.list_objects(s3_bucket,s3_prefix,recursive=True)
for obj in result:
    print(obj.object_name)

media/projects/7bce834a-bd56-4fa6-89d6-dcd2acb0b4cd/7b1be4c4-084d-46d7-948d-12b04b26b049/config.yaml


In [33]:
obj

Object(bucket_name='innotone-media', object_name='media/projects/7bce834a-bd56-4fa6-89d6-dcd2acb0b4cd/train/spam_train_test.csv', last_modified=datetime.datetime(2025, 9, 23, 19, 17, 6, 424000, tzinfo=datetime.timezone.utc), etag='2f6b95bea94f24f9ab591d0dd751f6a3', size=557, metadata={}, version_id=None, is_latest=None, storage_class='STANDARD', owner_id=None, owner_name=None, content_type=None, is_delete_marker=False, tags=None, is_dir=False)

In [32]:
s3_prefix ='media/projects/7bce834a-bd56-4fa6-89d6-dcd2acb0b4cd/'
result = sc.client.list_objects(s3_bucket,s3_prefix,recursive=True)
for obj in result:
    print(obj.object_name)

media/projects/7bce834a-bd56-4fa6-89d6-dcd2acb0b4cd/7b1be4c4-084d-46d7-948d-12b04b26b049/config.yaml
media/projects/7bce834a-bd56-4fa6-89d6-dcd2acb0b4cd/eval/spam_cv_test.csv
media/projects/7bce834a-bd56-4fa6-89d6-dcd2acb0b4cd/train/spam_train_test.csv


In [26]:
obj

Object(bucket_name='innotone-media', object_name='media/projects/7bce834a-bd56-4fa6-89d6-dcd2acb0b4cd/7b1be4c4-084d-46d7-948d-12b04b26b049/', last_modified=None, etag=None, size=None, metadata=None, version_id=None, is_latest=None, storage_class=None, owner_id=None, owner_name=None, content_type=None, is_delete_marker=False, tags=None, is_dir=True)

In [19]:
se.upload_file('scripts/config.yaml','config.yaml')

True

In [None]:
transf

In [25]:
yaml_path = 'projects_yaml/config.yaml'
def load_yaml(yaml_path):
    import yaml
    with open(yaml_path, 'r') as f:
        return yaml.safe_load(f)

In [26]:
yaml_config = load_yaml(yaml_path)

In [27]:
file_paths = [yaml_config['train_file_path'], yaml_config['cv_file_path'], yaml_path]
for file_path in file_paths:
    se.upload_file(file_path,file_path)

In [23]:
se.disconnect()

In [28]:
script_path = 'run_docker_job.sh'
se.upload_file(script_path)
se.execute_script('run_docker_job.sh')




In [None]:

se.disconnect()