# Most forked Computational Neuroscience repositories
As of May 12 2022 at 4:41 pm CST

In [2]:
cnl = ["NeuromatchAcademy/course-content",
"cnrl/cns-project-template",
"translationalneuromodeling/tapas",
"ContextLab/computational-neuroscience",
"computational-neuroscience/Computational-Neuroscience-UW",
"neurolib-dev/neurolib",
"simetenn/uncertainpy",
"CompCogNeuro/sims",
"compmem/compsy",
"conorhoughton/COMS30127",
"ashumeow/Computational-NeuroScience",
"kuz/Computational-Neuroscience-Course",
"INCF/neuroshapes",
"CompCogNeuro/ed4",
"alisharifi2000/CS-SBU-ComputationalNeuroScience2021-projects",
"karnigili/Computational-Neuroscience",
"robclewley/compneuro",
"btel/python-in-neuroscience-tutorials",
"neurodebian/neurodebian",
"alfredcai/Coursera-Computational-NeuroScience",
"neurodata/brainlit",
"patrickmineault/xcorr-notebooks",
"ITNG/ModelingNeuralDynamics",
"CNS-OIST/a310_cns_2018",
"rougier/Neurosciences"]

In [1]:
import unicodedata
import re
import json
import os
import nltk
from nltk.tokenize.toktok import ToktokTokenizer
from nltk.corpus import stopwords
import pandas as pd

pd.options.display.max_colwidth = 100

## Acquire

In [3]:
data = pd.read_json('data.json')

In [4]:
data.shape

(125, 3)

## Prepare

In [5]:
def basic_clean(instr):
    '''
    Clean our data by making everything lowercase, normalize unicode characters, and removing unwanted characters
    '''
    # Lower case
    instr = instr.lower()
    # Normalize
    instr = unicodedata.normalize('NFKD' , instr).encode('ascii','ignore').decode('utf-8', 'ignore')
    # remove unwanted characters
    instr = re.sub(f"[^a-z0-9'\s]", '', instr)
    # Return the cleaned string
    return instr

In [6]:
data['clean'] = [basic_clean(readme) for readme in data.readme_contents]

In [7]:
def tokenize(instr):
    '''
    Tokenize the target string. We breakup words and puctuation into descrete units
    '''
    
    tokenizer = ToktokTokenizer()
    
    instr = tokenizer.tokenize(instr, return_str = True)
    
    return instr

In [8]:
data.clean = [tokenize(readme) for readme in data.clean]

In [9]:
# def stem(instr):
#     '''
#     '''
    
#     ps = nltk.porter.PorterStemmer()
    
#     stems = [ps.stem(word) for word in instr.split()]
    
#     instr = ' '.join(stems)
    
#     return instr

In [10]:
# data['stem'] = [stem(readme) for readme in data.clean]

In [11]:
def lemmatize(instr):
    '''
    
    '''
    
    wnl = nltk.stem.WordNetLemmatizer()
    
    lemmas = [wnl.lemmatize(word) for word in instr.split()]
    
    instr = ' '.join(lemmas)
    
    return instr

In [12]:
data.clean = [lemmatize(readme) for readme in data.clean]

In [13]:
def remove_stopwords(instr, extra_words = [], exclude_words= []):
    '''
    
    '''
    
    stopword_list = stopwords.words('english')
    
    if exclude_words: 
        for word in exclude_words:
            stopword_list.remove(word)
    
    if extra_words:
        for word in extra_words:
            stopword_list.append(word)
    
    words = instr.split()
    
    filtered_words = [w for w in words if w not in stopword_list]
    
    words_removed = ' '.join(filtered_words)
    
    return words_removed

In [14]:
data.clean = [remove_stopwords(readme) for readme in data.clean]

## Explore

In [15]:
data.head()

Unnamed: 0,repo,language,readme_contents,clean
0,ccxt/ccxt,JavaScript,# CCXT – CryptoCurrency eXchange Trading Library\n\n[![Build Status](https://travis-ci.com/ccxt/...,ccxt cryptocurrency exchange trading library build statushttpstraviscicomccxtccxtsvgbranchmaster...
1,openssl/openssl,C,Engines\n=======\n\nDeprecation Note\n----------------\n\nThe ENGINE API was introduced in OpenS...,engine deprecation note engine api wa introduced openssl version 096 low level interface adding ...
2,HelloZeroNet/ZeroNet,JavaScript,# ZeroNet [![Build Status](https://travis-ci.org/HelloZeroNet/ZeroNet.svg?branch=master)](https:...,zeronet build statushttpstravisciorghellozeronetzeronetsvgbranchmasterhttpstravisciorghellozeron...
3,freqtrade/freqtrade,Python,# ![freqtrade](https://raw.githubusercontent.com/freqtrade/freqtrade/develop/docs/assets/freqtra...,freqtradehttpsrawgithubusercontentcomfreqtradefreqtradedevelopdocsassetsfreqtradepoweredbysvg fr...
4,amark/gun,JavaScript,"<p id=""readme""><a href=""https://gun.eco/""><img width=""40%"" src=""https://cldup.com/TEy9yGh45l.svg...",p idreadmea hrefhttpsgunecoimg width40 srchttpscldupcomtey9ygh45lsvgaimg width50 alignright vspa...


In [16]:
labels = pd.concat([data.language.value_counts(), data.language.value_counts(normalize=True)], axis=1)
labels.columns = ['n', 'percent']
labels

Unnamed: 0,n,percent
Python,27,0.232759
JavaScript,24,0.206897
Jupyter Notebook,18,0.155172
HTML,5,0.043103
C,4,0.034483
C++,4,0.034483
Java,4,0.034483
Shell,3,0.025862
Ruby,3,0.025862
Go,3,0.025862


In [24]:
data.language.value_counts(dropna=False)

Python              27
JavaScript          24
Jupyter Notebook    18
NaN                  9
HTML                 5
C++                  4
Java                 4
C                    4
Shell                3
Go                   3
Ruby                 3
TeX                  3
MATLAB               2
TypeScript           2
Matlab               2
Groff                2
R                    1
CSS                  1
G-code               1
Kotlin               1
Mathematica          1
Assembly             1
PHP                  1
C#                   1
Swift                1
Objective-C          1
Name: language, dtype: int64

In [28]:
data[['repo','language']][data.language.isnull()]

Unnamed: 0,repo,language
51,asoplata/open-computational-neuroscience-resources,
53,eselkin/awesome-computational-neuroscience,
65,zifeo/EPFL,
73,neuronstar/spiking-neuron-models,
75,ossu/computer-science,
79,yrgo/awesome-educational-games,
87,ossu/math,
98,mikesprague/udacity-nanodegrees,
99,microsoft/Industry-Accelerator-Education,


In [19]:
for lang in data.language.unique():
    data[lang] = data.readme_contents[data.language == lang]

In [20]:
data.columns

Index([            'repo',         'language',  'readme_contents',
                  'clean',       'JavaScript',                'C',
                 'Python',              'C++',            'Swift',
                   'Java',               'C#',              'PHP',
                     'Go',       'TypeScript',         'Assembly',
            'Mathematica',             'HTML',              'CSS',
                  'Shell',           'Kotlin',           'G-code',
                   'Ruby',                'R', 'Jupyter Notebook',
                     None,           'MATLAB',              'TeX',
                 'Matlab',            'Groff',      'Objective-C'],
      dtype='object')

In [21]:
jupyter_notebook = ' '.join(data['Jupyter Notebook'].dropna())
python = ' '.join(data['Python'].dropna())
tex = ' '.join(data['TeX'].dropna())
go = ' '.join(data['Go'].dropna())
groff = ' '.join(data['Groff'].dropna())
html = ' '.join(data['HTML'].dropna())
shell = ' '.join(data['Shell'].dropna())

matlab = data['MATLAB'].dropna()
matlab = matlab.append(data['Matlab'].dropna())
matlab = ' '.join(matlab)

In [22]:
jupyter_notebook_freq = pd.Series(jupyter_notebook.split()).value_counts()
python_freq = pd.Series(python.split()).value_counts()
tex_freq = pd.Series(tex.split()).value_counts()
go_freq = pd.Series(go.split()).value_counts()
groff_freq = pd.Series(groff.split()).value_counts()
html_freq = pd.Series(html.split()).value_counts()
shell_freq = pd.Series(shell.split()).value_counts()
matlab_freq = pd.Series(matlab.split()).value_counts()

In [23]:
word_counts = (pd.concat([jupyter_notebook_freq,python_freq,tex_freq,go_freq,groff_freq,html_freq,shell_freq,matlab_freq],
                        axis = 1, sort = True)
               .set_axis(['jupyter_notebook_freq','python_freq','tex_freq','go_freq','groff_freq',
                          'html_freq','shell_freq','matlab_freq'], axis =1, inplace = False)
              .fillna(0)
              .apply(lambda s:s.astype(int)))
word_counts.sort_values(by='python_freq',ascending=False).head(10)

Unnamed: 0,jupyter_notebook_freq,python_freq,tex_freq,go_freq,groff_freq,html_freq,shell_freq,matlab_freq
the,192,803,9,200,8,164,33,160
and,146,389,4,107,8,83,42,58
of,91,381,4,74,14,63,23,82
to,83,372,8,75,6,124,31,30
|,246,345,0,0,0,0,35,0
a,48,285,5,50,0,72,26,24
is,51,235,1,25,2,30,8,42
in,80,226,1,67,0,52,21,44
-,37,215,0,12,0,51,94,40
for,68,164,3,63,2,34,21,56
