# Load networks

In [1]:
import os
import sys
import wikinet as wiki
import numpy as np
import pandas as pd
import networkx as nx
import scipy as sp

In [2]:
import plotly.express as px
import plotly.graph_objects as go

In [3]:
all_topics = [
    'anatomy', 'biochemistry', 'cognitive science', 'evolutionary biology',
    'genetics', 'immunology', 'molecular biology', 'chemistry', 'biophysics',
    'energy', 'optics', 'earth science', 'geology', 'meteorology',
    'philosophy of language', 'philosophy of law', 'philosophy of mind',
    'philosophy of science', 'economics', 'accounting', 'education',
    'linguistics', 'law', 'psychology', 'sociology', 'electronics',
    'software engineering', 'robotics',
    'calculus', 'geometry', 'abstract algebra',
    'Boolean algebra', 'commutative algebra', 'group theory', 'linear algebra',
    'number theory', 'dynamical systems and differential equations'
]
topics = [
    'biochemistry', 'cognitive science', 'evolutionary biology',
    'genetics', 'molecular biology', 'energy', 'optics',
    'philosophy of language', 'philosophy of law', 'philosophy of science',
    'linguistics', 'software engineering'
]

In [4]:
path_saved = os.path.join('/', 'Users', 'harangju', 'Developer', 'data', 'wikinet', 'pickles')

In [5]:
networks = {}
for topic in topics:
    print(topic, end=' ')
    networks[topic] = wiki.Net(
        path_graph=os.path.join(path_saved, topic + '.pickle')
    )

biochemistry cognitive science evolutionary biology genetics molecular biology energy optics philosophy of language philosophy of law philosophy of science linguistics software engineering 

# Node-years

In [6]:
# for topic in topics:
#     with open(f"{topic}.txt", 'w') as f:
#         attributes = nx.get_node_attributes(networks[topic].graph, 'year')
#         for k, v in attributes.items():
#             if len(str(v))<4:
#                 f.write(f"{v}\t\t{k}\n")
#             else:
#                 f.write(f"{v}\t{k}\n")

# Year-phrases

In [7]:
import wikinet as wiki

In [8]:
path_base = '/Users/harangju/Developer/data/wikinet/dump/'
name_xml = 'enwiki-20190801-pages-articles-multistream.xml.bz2'
name_index = 'enwiki-20190801-pages-articles-multistream-index.txt.bz2'
path_xml = path_base + name_xml
path_index = path_base + name_index
dump = wiki.Dump(path_xml, path_index)

In [9]:
dump.load_page('Philosophy')[:100]

Dump: Loading index...
Dump: Loaded.


'{{short description|Study of general and fundamental questions}}\n{{Other uses}}\n{{pp|small=yes}}\n{{U'

In [10]:
dump.load_page('Messenger RNA')[:100]

'[[Image:MRNA-interaction.png|thumb|500px|The "life cycle" of an \'\'\'mRNA\'\'\' in a [[eukaryote|eukaryot'

In [18]:
import numpy as np

num_sample = 100
for topic in topics:
    print(f"{topic}:", end=' ')
    with open(f"{topic}.txt", 'w') as f:
        np.random.seed(0)
        print(topic)
        nodes = networks[topic].nodes
        idx = np.random.permutation(len(nodes))
        idx = idx[:num_sample] if len(nodes)>num_sample else idx
        for i in idx:
            print(f"{i} {nodes[i]}", end='; ')
            page = dump.load_page(nodes[i])
            if page:
                f.write(f"{nodes[i]}: ")
            else:
                continue
            history = wiki.Dump.get_history(page)
            top = dump.page.get_sections()[0].strip_code()
            years, matches = wiki.Dump.filter_years(top + history, get_matches=True)
            if len(years)>0:
                j = np.argmin(years)
                match = matches[j]
                string = (top + history)[match.start()-140:match.end()+140]
                string = string.replace('\n', ' ')
                f.write(f"\n{string}\n\n")
            else:
                f.write(f"none\n\n")
    print()

biochemistry: 418 Haploid; Redirect from 'Haploid' to 'Ploidy'.
202 Primary structure; Redirect from 'Primary structure' to 'Biomolecular structure'.
682 Dynorphin; 316 Nervous system; 267 Serotonin; 790 Histamine H2 receptor; 936 Phosphopeptide; 295 Kinase; 624 Intermembrane space; 981 Radioisotopic labelling; Redirect from 'Radioisotopic labelling' to 'Isotopic labeling'.
704 Erythropoietin receptor; 75 Phospholipid; 27 Structure; 315 Synapse; 750 Structural domain; Redirect from 'Structural domain' to 'Protein domain'.
31 Macromolecule; 828 Kallidin; 479 Vitamin D-dependent calcium-binding protein; 884 Neoplasm protein; 557 Chemical thermodynamics; 378 Atp synthase; Redirect from 'Atp synthase' to 'ATP synthase'.
931 Peptide initiation factor; Redirect from 'Peptide initiation factor' to 'Translation (biology)'.
279 Microbe; Redirect from 'Microbe' to 'Microorganism'.
530 Cyclin A; 613 Concanavalin A; 311 Aminobutyric acid; 486 Calcium signaling; 395 Atrial natriuretic factor recept

212 Paleobiology; 180 Evolutionarily stable strategy; 166 Evolution of sexual reproduction; 96 Sean B. Carroll; 45 Domestication; 218 The Major Transitions in Evolution; 7 Natural selection; 158 Evolution of insects; 233 Hermann Joseph Muller; 108 Climatic adaptation; 246 Preadaptation; 181 Evolutionary taxonomy; 76 Gene flow; 247 Quantum evolution; 125 Nothing in Biology Makes Sense Except in the Light of Evolution; 198 Genetic hitchhiking; 178 Sociobiology; 27 Species; 106 Most recent common ancestor; 207 Language; 37 Allopatric speciation; 176 Evolutionary history of plants; 20 Molecular evolution; 103 Chronospecies; 64 Abiogenesis; 160 Evolution of human intelligence; 139 Ecological genetics; 5 Alfred Russel Wallace; 22 Evolutionary developmental biology; 187 Fisher's reproductive value; Redirect from 'Fisher's reproductive value' to 'Reproductive value (population genetics)'.
63 History of speciation; 12 Sexual selection; 153 Gene duplication; 226 Y-chromosomal Adam; 135 Landrace;

260 Silent mutation; 253 Screening (medicine); 6 Transcription (genetics); Redirect from 'Transcription (genetics)' to 'Transcription (biology)'.
1 Molecular biology; 293 Immunoprecipitation; 278 Helix-loop-helix; Redirect from 'Helix-loop-helix' to 'Basic helix-loop-helix'.
255 Electroporation; 204 Congenital; Redirect from 'Congenital' to 'Birth defect'.
153 Sickle-cell disease; Redirect from 'Sickle-cell disease' to 'Sickle cell disease'.
190 Chromosomal translocation; 383 Suicide gene; 246 Dnase; Redirect from 'Dnase' to 'Deoxyribonuclease'.
164 Side chain; 235 Retrovirus; 107 Leukemia; 52 Restriction enzyme; 10 Sds-page; Page 'Sds-page' not in index.
26 Cytosine; 45 Gene therapy; 232 Pseudogene; 223 Exonuclease; 249 Upstream and downstream (DNA); 194 Stop codon; 345 Post-translational regulation; 289 Nucleic acid hybridization; 15 Western blot; 158 Brca2; Redirect from 'Brca2' to 'BRCA2'.
259 Evolutionary clock; Redirect from 'Evolutionary clock' to 'Molecular clock'.
5 5' flankin

309 Off-axis optical system; 142 Zoom lens; 258 Physical optics; 66 Abbe prism; 229 Optical waveguide; Redirect from 'Optical waveguide' to 'Waveguide (optics)'.
12 X-ray; 307 Polarimeter; 254 Image processing; Redirect from 'Image processing' to 'digital image processing'.
320 Ray transfer matrix analysis; 330 Pincushion distortion; Redirect from 'Pincushion distortion' to 'Distortion (optics)'.
17 Photodiode; 170 Chrominance; 124 Collimated light; Redirect from 'Collimated light' to 'Collimated beam'.

philosophy of language: 222 Universal language; 101 Symbol; 214 Radical translation; 191 Marilyn Frye; 137 Friedrich Waismann; 229 Unilalianism; 149 Implicature; 106 Proper name (philosophy); 145 Principle of compositionality; 92 James F. Conant; 138 Function and Concept; 104 Ostensive definition; 136 Frege's Puzzle; Redirect from 'Frege's Puzzle' to 'Frege's puzzles'.
157 Jacques Bouveresse; 107 Mediated reference theory; 8 Meaning (philosophy of language); 71 Definition; 213 Plato's 

219 Greedy reductionism; 266 Pessimistic induction; 206 Friedrich Wilhelm Joseph Schelling; 230 Heroic theory of invention and scientific development; 298 Philosophy of computer science; 106 Philip Kitcher; 68 Antiscience; 74 Scientific theory; 158 Game theory; 270 Max Bense; 205 Friedrich Waismann; 179 Inductive reasoning; 212 Time; 7 Philosophy of physics; 64 Internalism and externalism; 214 Geoffrey Hellman; 159 Decision theory; 263 Joseph Henry Woodger; 226 Henry Moyes; 59 Causality; 101 C. D. Broad; 134 Niels Bohr; 56 Uniformitarianism; 116 Perception; 246 Information ethics; 186 Ernst W. Mayr; Redirect from 'Ernst W. Mayr' to 'Ernst Mayr'.
164 Determinism; 156 David Stenhouse; 196 Fatalism; 63 Philosophy of mathematics; 299 Philosophy of economics; Redirect from 'Philosophy of economics' to 'Philosophy and economics'.
247 Philosophy of information; 310 Quantity; 338 Neuroethics; 142 Conservation biology; 21 Artificial intelligence; 315 Relationship between religion and science; 1

80 Bcpl; Page 'Bcpl' not in index.
51 Operating system; 111 Design pattern (computer science); Redirect from 'Design pattern (computer science)' to 'Software design pattern'.
156 Instruction set; Redirect from 'Instruction set' to 'Instruction set architecture'.
197 Systems design; 86 Declarative programming; 138 Numerical analysis; 129 Iso 9660; Page 'Iso 9660' not in index.
161 Interface description language; 116 Disassembler; 41 Antipattern; Redirect from 'Antipattern' to 'Anti-pattern'.
14 Video game; 27 Java (programming language); 
