In [10]:
from huggingface_hub import login, HfApi
import os
import sys
import dotenv
from pathlib import Path

In [6]:

dotenv.load_dotenv()
HF_TOKEN = os.getenv("HF_TOKEN")

## login huggingface user
if HF_TOKEN is None:
    print("Please set the HF_TOKEN environment variable. This is you hugging face token")
else:
    print("Logging in...")
    login(HF_TOKEN)

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


Logging in...


In [8]:
## verify login
api = HfApi()
user = api.whoami()
user_name = user['name']
print(f"Logged in as {user_name}")

Logged in as gaurangdave


In [14]:
## create a model repository on huggingface
model_name = "mnist_digits_recognition"
repo_id = f"{user_name}/{model_name}"

## create a model repository
model_repo = api.create_repo(repo_id=repo_id, repo_type="model", exist_ok=True)
print(f"Created repository: {model_repo}")

## create a data repository
model_repo = api.create_repo(repo_id=repo_id, repo_type="dataset", exist_ok=True)
print(f"Created repository: {model_repo}")

Created repository: https://huggingface.co/gaurangdave/mnist_digits_recognition
Created repository: https://huggingface.co/datasets/gaurangdave/mnist_digits_recognition


In [26]:
## upload all the models to the repository
models_root_dir = Path("..", "models")

def upload_models_in_dir(model_dir):
    for model in model_dir.iterdir():
        if model.is_dir():
            upload_models_in_dir(model)
        else: 
            filename = model.name
            ## read path relative to the models directory
            path = model.relative_to(models_root_dir)
            path_in_repo = f"{path}"
            api = HfApi()
            api.upload_file(path_or_fileobj=model, repo_id=repo_id, path_in_repo=path_in_repo, repo_type="model")
            print(f"Uploaded {filename} to {path}")

In [27]:

upload_models_in_dir(models_root_dir)

Uploaded random_forest_model.pkl to ensemble/random_forest_model.pkl


voting_classifier_model.pkl:   0%|          | 0.00/3.84G [00:00<?, ?B/s]

Uploaded voting_classifier_model.pkl to ensemble/voting_classifier_model.pkl


logistic_regression_model.pkl:   0%|          | 0.00/109k [00:00<?, ?B/s]

Uploaded logistic_regression_model.pkl to ensemble/logistic_regression_model.pkl


tuned_random_forest_model.pkl:   0%|          | 0.00/978M [00:00<?, ?B/s]

Uploaded tuned_random_forest_model.pkl to ensemble/tuned_random_forest_model.pkl


tuned_logistic_regression_model.pkl:   0%|          | 0.00/109k [00:00<?, ?B/s]

Uploaded tuned_logistic_regression_model.pkl to ensemble/tuned_logistic_regression_model.pkl


soft_voting_classifier_model.pkl:   0%|          | 0.00/3.84G [00:00<?, ?B/s]

Uploaded soft_voting_classifier_model.pkl to ensemble/soft_voting_classifier_model.pkl


gradient_boosting_model.pkl:   0%|          | 0.00/1.88M [00:00<?, ?B/s]

Uploaded gradient_boosting_model.pkl to ensemble/gradient_boosting_model.pkl


extra_trees_model.pkl:   0%|          | 0.00/1.02G [00:00<?, ?B/s]

Uploaded extra_trees_model.pkl to ensemble/extra_trees_model.pkl


tuned_knn_model.pkl:   0%|          | 0.00/1.41G [00:00<?, ?B/s]

Uploaded tuned_knn_model.pkl to ensemble/tuned_knn_model.pkl


svc_model.pkl:   0%|          | 0.00/225M [00:00<?, ?B/s]

Uploaded svc_model.pkl to ensemble/svc_model.pkl


knn_model.pkl:   0%|          | 0.00/1.41G [00:00<?, ?B/s]

Uploaded knn_model.pkl to ensemble/knn_model.pkl


tuned_logistic_regression_v0.pkl:   0%|          | 0.00/125k [00:00<?, ?B/s]

Uploaded tuned_logistic_regression_v0.pkl to ensemble/tuned_logistic_regression_v0.pkl


tuned_svc_model.pkl:   0%|          | 0.00/203M [00:00<?, ?B/s]

Uploaded tuned_svc_model.pkl to ensemble/tuned_svc_model.pkl


logistic_regression_v0.joblib:   0%|          | 0.00/77.7k [00:00<?, ?B/s]

Uploaded logistic_regression_v0.joblib to logistic_regression_v0.joblib


random_forest_v0.joblib:   0%|          | 0.00/136M [00:00<?, ?B/s]

Uploaded random_forest_v0.joblib to random_forest_v0.joblib


logistic_regression_v2.joblib:   0%|          | 0.00/77.7k [00:00<?, ?B/s]

Uploaded logistic_regression_v2.joblib to logistic_regression_v2.joblib
Uploaded mnist_models_metrics.csv to mnist_models_metrics.csv


knn_v1.joblib:   0%|          | 0.00/352M [00:00<?, ?B/s]

Uploaded knn_v1.joblib to knn_v1.joblib


svc_prod_v1.joblib:   0%|          | 0.00/70.5M [00:00<?, ?B/s]

Uploaded svc_prod_v1.joblib to svc_prod_v1.joblib


svc_v3.joblib:   0%|          | 0.00/70.7M [00:00<?, ?B/s]

Uploaded svc_v3.joblib to svc_v3.joblib


svc_augmented_data_v1.joblib:   0%|          | 0.00/230M [00:00<?, ?B/s]

Uploaded svc_augmented_data_v1.joblib to svc_augmented_data_v1.joblib
Uploaded svc_prod_v3.joblib to svc_prod_v3.joblib


logistic_regression_v1_cv_results.joblib:   0%|          | 0.00/2.19k [00:00<?, ?B/s]

Uploaded logistic_regression_v1_cv_results.joblib to logistic_regression_v1_cv_results.joblib


svc_v1.joblib:   0%|          | 0.00/74.7M [00:00<?, ?B/s]

Uploaded svc_v1.joblib to svc_v1.joblib


logistic_regression_v1.joblib:   0%|          | 0.00/77.7k [00:00<?, ?B/s]

Uploaded logistic_regression_v1.joblib to logistic_regression_v1.joblib


default_logistic_regression_probabilites.csv:   0%|          | 0.00/12.2M [00:00<?, ?B/s]

Uploaded default_logistic_regression_probabilites.csv to default_logistic_regression_probabilites.csv


random_forest_v1.joblib:   0%|          | 0.00/272M [00:00<?, ?B/s]

Uploaded random_forest_v1.joblib to random_forest_v1.joblib


svc_v2.joblib:   0%|          | 0.00/70.5M [00:00<?, ?B/s]

Uploaded svc_v2.joblib to svc_v2.joblib


knn_v0.joblib:   0%|          | 0.00/352M [00:00<?, ?B/s]

Uploaded knn_v0.joblib to knn_v0.joblib
Uploaded svc_v0.joblib to svc_v0.joblib
Uploaded svc_prod_v2.joblib to svc_prod_v2.joblib


svc_prod.joblib:   0%|          | 0.00/70.5M [00:00<?, ?B/s]

Uploaded svc_prod.joblib to svc_prod.joblib


In [30]:
## upload all the datasets to the repository
data_root_dir = Path("..", "data")

def upload_data_in_dir(data_dir):
    for dataset in data_dir.iterdir():
        if dataset.is_dir():
            upload_data_in_dir(dataset)
        else: 
            filename = dataset.name
            ## read path relative to the models directory
            path = dataset.relative_to(data_root_dir)
            path_in_repo = f"{path}"
            api = HfApi()
            api.upload_file(path_or_fileobj=dataset, repo_id=repo_id, path_in_repo=path_in_repo, repo_type="dataset")
            print(f"Uploaded {filename} to {path}")

In [31]:
upload_data_in_dir(data_root_dir)

Uploaded user_prediction_request.csv to user_prediction_request.csv


mnist_train_set.csv:   0%|          | 0.00/102M [00:00<?, ?B/s]

Uploaded mnist_train_set.csv to mnist_train_set.csv


raw_mnist_data.csv:   0%|          | 0.00/128M [00:00<?, ?B/s]

Uploaded raw_mnist_data.csv to raw_mnist_data.csv


augmented_train_X.csv:   0%|          | 0.00/511M [00:00<?, ?B/s]

Uploaded augmented_train_X.csv to augmented_train_X.csv
Uploaded augmented_train_Y.csv to augmented_train_Y.csv
Uploaded user_input.csv to user_input.csv


augmented_ensemle_learning_mnist_data.csv:   0%|          | 0.00/409M [00:00<?, ?B/s]

Uploaded augmented_ensemle_learning_mnist_data.csv to augmented_ensemle_learning_mnist_data.csv


augmented_mnist_data.csv:   0%|          | 0.00/639M [00:00<?, ?B/s]

Uploaded augmented_mnist_data.csv to augmented_mnist_data.csv


mnist_test_set.csv:   0%|          | 0.00/25.6M [00:00<?, ?B/s]

Uploaded mnist_test_set.csv to mnist_test_set.csv
