## How to get started:

1. Download / Clone the [Repo](https://kirr.co/0bu8v5)
2. Activate a virtual environment `python3 -m venv .`
3. Run `pip install -r requirements.txt`
4. Run `jupyter notebook`

> If you get a import error related to django, just do steps 2-4 in a new terminal / powershell.

In [1]:
# !pip freeze

In [2]:
from django_setup import init_django
init_django('autogen')

In [3]:
from sqlalchemy import create_engine
sql_engine = create_engine("sqlite:///lite.sqlite3")

In [4]:
import pathlib
import pandas as pd

In [5]:
ROOT_DIR = pathlib.Path().resolve()
print(ROOT_DIR)
NBS_DIR = ROOT_DIR / "nbs" 
DATA_DIR = ROOT_DIR / "data"

/Users/cfe/Dev/autogen_dj


In [6]:
topics_csv = DATA_DIR / 'programming_topics.csv' # scrape stackoverflow
topics_csv.exists()

True

In [7]:
df = pd.read_csv(topics_csv)

In [8]:
df.head()

Unnamed: 0,tag,count
0,javascript,1910720
1,java,1614091
2,c#,1364258
3,php,1320539
4,python,1299327


In [9]:
df['percent'] = df['count'] / df['count'].sum()
df['percent_human'] = (df['count'] / df['count'].sum()) * 100
df.head()

Unnamed: 0,tag,count,percent,percent_human
0,javascript,1910720,0.035787,3.578688
1,java,1614091,0.030231,3.023116
2,c#,1364258,0.025552,2.55519
3,php,1320539,0.024733,2.473307
4,python,1299327,0.024336,2.433578


In [10]:
from sqlalchemy.types import DECIMAL, String

# String -> models.CharField()
# DECIMAL -> models.DecimalField()
# Default dtype -> models.TextField()


sql_dtypes = {
    'tag': String,
    'percent': DECIMAL,
    'percent_human': String,
}


In [11]:
df.to_sql(name='topics', if_exists='replace', con=sql_engine, dtype=sql_dtypes)

In [12]:
!python manage.py inspectdb --database lite 

# This is an auto-generated Django model module.
# You'll have to do the following manually to clean this up:
#   * Rearrange models' order
#   * Make sure each model has one field with primary_key=True
#   * Make sure each ForeignKey and OneToOneField has `on_delete` set to the desired behavior
#   * Remove `managed = False` lines if you wish to allow Django to create, modify, and delete the table
# Feel free to rename the models, but don't rename db_table values or field names.
from django.db import models


class Topics(models.Model):
    index = models.BigIntegerField(blank=True, null=True)
    tag = models.CharField(blank=True, null=True)
    count = models.BigIntegerField(blank=True, null=True)
    percent = models.DecimalField(max_digits=10, decimal_places=5, blank=True, null=True)  # max_digits and decimal_places have been guessed, as this database handles decimal fields as float
    percent_human = models.CharField(blank=True, null=True)

    class Meta:
    

In [13]:
!python manage.py makemigrations
!python manage.py migrate

No changes detected
[36;1mOperations to perform:[0m
[1m  Apply all migrations: [0madmin, auth, contenttypes, sessions, topics
[36;1mRunning migrations:[0m
  No migrations to apply.


In [14]:
from topics.models import Topic

In [15]:
columns = ['tag', 'count', 'percent', 'percent_human']
new_data = df[columns].to_dict('records')
new_data

[{'tag': 'javascript',
  'count': 1910720,
  'percent': 0.03578687532754425,
  'percent_human': 3.578687532754425},
 {'tag': 'java',
  'count': 1614091,
  'percent': 0.030231155472445585,
  'percent_human': 3.0231155472445583},
 {'tag': 'c#',
  'count': 1364258,
  'percent': 0.025551902403599096,
  'percent_human': 2.55519024035991},
 {'tag': 'php',
  'count': 1320539,
  'percent': 0.024733066361455346,
  'percent_human': 2.4733066361455345},
 {'tag': 'python',
  'count': 1299327,
  'percent': 0.024335775706912623,
  'percent_human': 2.4335775706912623},
 {'tag': 'android',
  'count': 1237704,
  'percent': 0.02318160627428552,
  'percent_human': 2.318160627428552},
 {'tag': 'jquery',
  'count': 971607,
  'percent': 0.018197736233655,
  'percent_human': 1.8197736233655},
 {'tag': 'html',
  'count': 863953,
  'percent': 0.01618142810032754,
  'percent_human': 1.618142810032754},
 {'tag': 'c++',
  'count': 644907,
  'percent': 0.012078800874466474,
  'percent_human': 1.2078800874466473},


In [16]:
qs = Topic.objects.all()
if qs.count() != 0:
    qs.delete()
    
new_entries = []
for d in new_data:
    new_obj = Topic(**d)
    new_entries.append(new_obj)

Topic.objects.bulk_create(new_entries)

[<Topic: Topic object (None)>,
 <Topic: Topic object (None)>,
 <Topic: Topic object (None)>,
 <Topic: Topic object (None)>,
 <Topic: Topic object (None)>,
 <Topic: Topic object (None)>,
 <Topic: Topic object (None)>,
 <Topic: Topic object (None)>,
 <Topic: Topic object (None)>,
 <Topic: Topic object (None)>,
 <Topic: Topic object (None)>,
 <Topic: Topic object (None)>,
 <Topic: Topic object (None)>,
 <Topic: Topic object (None)>,
 <Topic: Topic object (None)>,
 <Topic: Topic object (None)>,
 <Topic: Topic object (None)>,
 <Topic: Topic object (None)>,
 <Topic: Topic object (None)>,
 <Topic: Topic object (None)>,
 <Topic: Topic object (None)>,
 <Topic: Topic object (None)>,
 <Topic: Topic object (None)>,
 <Topic: Topic object (None)>,
 <Topic: Topic object (None)>,
 <Topic: Topic object (None)>,
 <Topic: Topic object (None)>,
 <Topic: Topic object (None)>,
 <Topic: Topic object (None)>,
 <Topic: Topic object (None)>,
 <Topic: Topic object (None)>,
 <Topic: Topic object (None)>,
 <Topic:

In [17]:
qs = Topic.objects.all()

In [18]:
qs.count() == df.shape[0]

True

In [19]:
qs.first().percent_human

'3.578687532754425'