In [190]:
%%writefile classifier.py
import pandas as pd
import numpy as np

from pathlib import Path
import matplotlib.pyplot as plt
import os
import json

def pre_process(fid=None):
    fid = Path(fid)
    df = pd.read_csv('gender-classifier-DFE-791531.csv',  encoding='latin-1')
    girls_tweets = df[df['gender']=='female'][['text','gender']]
    boys_tweets =  df[df['gender']=='male'][['text','gender']]
    df = pd.concat([girls_tweets,boys_tweets])
    df['binary'] = pd.get_dummies(df.gender, prefix='gender')['gender_female']
    df = df[['text','gender']]
    plt.bar(['women','men'],height=np.bincount(df['binary']))

    train = df.sample(frac=0.8,random_state=43)
    test = df.drop(train.index)
    valid = train.sample(frac=0.1, random_state=43)
    train = train.drop(valid.index)

    train.to_csv('gender_text_train.csv', mode='w', index=False)
    test.to_csv('gender_text_test.csv', mode='w', index=False)
    valid.to_csv('gender_text_valid.csv', mode='w', index=False)
    
def train_hf_api(args):

    if args.login:
        sub_args = ('autonlp login --api-key  ')
        os.system(sub_args+args.api_key)
    
    if args.send:
        sub_args = f'autonlp upload '
        sub_args+= f' --project {args.project}'
        sub_args+= f' --split {args.split}'
        sub_args+= f' --col_mapping {args.col_mapping}'
        sub_args+= f' --files {args.files}'
        os.system(sub_args)
        
        
    if args.make:
        sub_args = f'autonlp create_project '
        sub_args+= f' --name {args.name}'
        sub_args+= f' --language {args.language}'
        sub_args+= f' --task {args.task}'
        sub_args+= f' --max_models {args.max_models}'
        os.system(sub_args)

    if args.train:
        sub_args = f'autonlp train '
        sub_args+= f' --project {args.project}'
        os.system(sub_args)

def inference(json_fid=None):
    with open(json_fid,'r') as fid:
        text_dict = json.load(json_fid)
        
    # load some model eg. model = torch.loadstatedict(model.pth)
    # for key in text_dict:
    #    prediction = model.predict(text_dict[key])
    #    text_dict[pred_gender]=prediction
    # json.dump(text_dict)
    # 
        


Overwriting classifier.py


In [198]:
%%writefile main.py

import classifier
import argparse


#hugging face api autonlp argugments (passed back to lower level in stack) 

parser = argparse.ArgumentParser(description='Training Calssifier ')


parser.add_argument('--input csv', default='gender-classifier-DFE-791531.csv', 
                    type=str, help='relative loacation of input csv for training')
parser.add_argument('--project', default='gender_class', type=str, help='poject name')


parser.add_argument('--split', default='train', type=str, help='dataset split')

parser.add_argument('--col_mapping',default=None, type=str, help='text:text, label:target')

parser.add_argument('--files',default='gender_text_train.csv', type=str, 
                    help='formated csv only 2 colls one for text one for target')

parser.add_argument('--api_key', default=None, type=str, help='api key from hugging_face account')
parser.add_argument('--resize', type=int, help='Resizes images by percentage as scalar')


parser.add_argument('--name', type=str, help='project name hugging face')

parser.add_argument('--language', type=str, help='lang in eg [en,sp,fr]')

parser.add_argument('--task', type=str, default='binary_classification',
                    help='Resizes images by percentage as scalar')
parser.add_argument('--max_models', type=int, default=2, help= 'nuber of trainable models')
parser.add_argument('--create_project', action='store_true',  help='create_new hf project')

#meta args-- directing sub process
parser.add_argument('--hugging_face', action='store_true', help='uses hugging face api to train model')
parser.add_argument('--send', action='store_true',  help='if entered will try to sen .csv')
parser.add_argument('--login', action='store_true',  help='if entered will try to sen .csv')
parser.add_argument('--make', action='store_true',  help='create_new hf project')
parser.add_argument('--train', action='store_true',  help='create_new hf project')

args = parser.parse_args()


if __name__=='__main__':
    
    if args.hugging_face:
        print(args.api_key)
        classifier.train_hf_api(args)
    
        print('Model trianing using autonlp (hugging face api)')
       

    
   
    
    
    
    

Overwriting main.py


In [201]:
!python3 main.py --hugging_face --login --api_key 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 


here
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
> [1mINFO    🗝 Successfully logged in as Frida[0m
> [1mINFO    🗝 Storing credentials in:  /Users/fridades/.autonlp[0m
Welcome to 🤗 AutoNLP! Start by creating a project: [91mautonlp create_project[0m
Model trianing using autonlp (hugging face api)


here
None
> [1mINFO    Uploading files for project: gender_class[0m
> [1mINFO    🗝 Retrieving credentials from config...[0m
> [1mINFO    ☁ Retrieving project 'gender_class' from AutoNLP...[0m
> [1mINFO    🔄 Refreshing project status...[0m
> [1mINFO    🔄 Refreshing uploaded files information...[0m
> [1mINFO    🔄 Refreshing models information...[0m
> [1mINFO    🔄 Refreshing cost information...[0m
> [1mINFO    ✅ Successfully loaded project: 'gender_class'![0m
> [1mINFO    Mapping: {'text': 'text', 'gender': 'target'}[0m
> [1mINFO    [1/1] 🔎 Validating gender_text_train.csv and column mapping...[0m
Using custom data configuration default-274ad4e6cc812bc7
Reusing dataset csv (/Users/fridades/.cache/huggingface/datasets/csv/default-274ad4e6cc812bc7/0.0.0/9144e0a4e8435090117cea53e6c7537173ef2304525df4a077c435d8ee7828ff)
> [1mINFO    [1/1] 📦 Copying gender_text_train.csv to /Users/fridades/.huggingface/autonlp/projects/Frida/autonlp-data-gender_class/raw/gender_text_train.

In [202]:
import gdown 
import os
import zipfile
from pathlib import Path

In [18]:
https://drive.google.com/file/d/1TdkdylFnlI1efahselb-y5o8hI-9vZza/view

Unnamed: 0,text,gender
0,Not even looking forward to this drive to Attl...,female
1,"@CorsoJo yes! And at the hotel I'm looking at,...",female
2,It's great that more ppl love dogs &amp; cats ...,female
3,"face is boiling, hands and feet are freezing_Ù...",female
4,@TypicalGamer Hey my name is Travis and I woul...,male
...,...,...
1027,@brokentelephan sorry for ignoring you so much...,male
1028,Like the title one of her songs @elliegoulding...,male
1029,"It had been so long, the box actually had a th...",female
1030,@_blowNEmind I have a coupe I only needed my 2...,female
