In [None]:
path_root = '/content/'
path_csv = path_root + 'notebooks.csv'
path_ipynb = path_root + 'ipynb/'
path_py = path_root + 'py/'
path_report = path_root + 'report/'
path_moss = path_root + 'moss'
path_moss_base = path_py + 'original_work.py'
ext_ipynb = '.ipynb'
ext_py = '.py'
moss_user_id = 284199714

In [None]:
!rm -rf '{path_ipynb}' '{path_py}' '{path_report}'
!mkdir '{path_ipynb}' '{path_py}' '{path_report}'

In [None]:
!pip install unidecode

In [None]:
import os
import re
import csv
import json
import unidecode
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

In [None]:
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

with open(path_csv, 'r') as input_csv:
    reader = csv.reader(input_csv)
    next(reader)
    for i, row in enumerate(reader):
        title_unicode = re.sub('[\s/]', '_', f'{i+1}_{row[2]}_{row[3]}')
        title = unidecode.unidecode(title_unicode)
        url_ipynb = row[5]

        print(f"[{i+1}]\tDownloading\t{title}{ext_ipynb} <- {url_ipynb}")
        
        id = re.findall('[-\w]{25,}', url_ipynb)[0]
        downloaded = drive.CreateFile({'id':id})
        downloaded.GetContentFile(path_ipynb + title + ext_ipynb)

        with open(path_ipynb + title + ext_ipynb, 'r') as input_json:
            py = json.load(input_json)

            print(f"[{i+1}]\tConverting\t{title}{ext_py} <- {title}{ext_ipynb}")

            with open(path_py + title + ext_py, 'w') as output_py:
                code = [line for cell in py['cells'] if cell['cell_type'] == 'code' for line in cell['source']]
                output_py.write(''.join(code))

In [None]:
MOSS = r"""#!/usr/bin/env perl

use IO::Socket;

@languages = ("c", "cc", "java", "ml", "pascal", "ada", "lisp", "scheme", "haskell", "fortran", "ascii", "vhdl", "perl", "matlab", "python", "mips", "prolog", "spice", "vb", "csharp", "modula2", "a8086", "javascript", "plsql", "verilog");

$server = 'moss.stanford.edu';
$port = '7690';
$noreq = "Request not sent.";
$usage = "usage: moss [-x] [-l language] [-d] [-b basefile1] ... [-b basefilen] [-m #] [-c \"string\"] file1 file2 file3 ...";

$userid = {{moss_user_id}};
$opt_l = "c";
$opt_m = 10;
$opt_d = 0;
$opt_x = 0;
$opt_c = "";
$opt_n = 250;
$bindex = 0;

while(@ARGV && ($_ = $ARGV[0]) =~ /^-(.)(.*)/) {
    ($first,$rest) = ($1,$2);	
    shift(@ARGV);
    
    if($first eq "d") {
        $opt_d = 1;
        next;
    }

    if($first eq "b") {
        if($rest eq '') {
            die "No argument for option -b.\n" unless @ARGV;
            $rest = shift(@ARGV);
        }
        
        $opt_b[$bindex++] = $rest;
        next;
    }

    if($first eq "l") {
        if ($rest eq '') {
	        die "No argument for option -l.\n" unless @ARGV;
	        $rest = shift(@ARGV);
	    }

	    $opt_l = $rest;
        next;
    }

    if($first eq "m") {
        if($rest eq '') {
            die "No argument for option -m.\n" unless @ARGV;
            $rest = shift(@ARGV);
	    }

        $opt_m = $rest;
        next;
    }
    
    if($first eq "c") {
	    if($rest eq '') {
	        die "No argument for option -c.\n" unless @ARGV;
	        $rest = shift(@ARGV);
	    }

	    $opt_c = $rest;
	    next;
    }

    if($first eq "n") {
	    if($rest eq '') {
	        die "No argument for option -n.\n" unless @ARGV;
	        $rest = shift(@ARGV);
	    }

	    $opt_n = $rest;
	    next;
    }

    if($first eq "x") {
	    $opt_x = 1;
	    next;
    }

    if($first eq "s") {
	    $server = shift(@ARGV);
	    next;
    }

    if($first eq "p") {
	    $port = shift(@ARGV);
	    next;
    }

    die "Unrecognized option -$first.  $usage\n"; 
}

print "Checking files . . . \n";
$i = 0;
while($i < $bindex) {
    die "Base file $opt_b[$i] does not exist. $noreq\n" unless -e "$opt_b[$i]";
    die "Base file $opt_b[$i] is not readable. $noreq\n" unless -r "$opt_b[$i]";
    die "Base file $opt_b is not a text file. $noreq\n" unless -T "$opt_b[$i]";
    $i++;
}

foreach $file (@ARGV) {
    die "File $file does not exist. $noreq\n" unless -e "$file";
    die "File $file is not readable. $noreq\n" unless -r "$file";
    die "File $file is not a text file. $noreq\n" unless -T "$file";
}

if("@ARGV" eq '') {
   die "No files submitted.\n $usage";
}

print "OK\n";

$sock = new IO::Socket::INET(PeerAddr => $server,
                             PeerPort => $port,
                             Proto => 'tcp');

die "Could not connect to server $server: $!\n" unless $sock;
$sock->autoflush(1);

sub read_from_server {
    $msg = <$sock>;
    print $msg;
}

sub upload_file {
    local ($file, $id, $lang) = @_;
    open(F,$file);
    $size = 0;
    while (<F>) {
	    $size += length($_);
    }
    close(F);
    print "Uploading $file ...";
    open(F,$file);
    $file =~s/\s/\_/g;
    print $sock "file $id $lang $size $file\n";
    while (<F>) {
        print $sock $_;
    }
    close(F);
    print "done.\n";
}


print $sock "moss $userid\n";
print $sock "directory $opt_d\n";
print $sock "X $opt_x\n";
print $sock "maxmatches $opt_m\n";
print $sock "show $opt_n\n";

print $sock "language $opt_l\n";
$msg = <$sock>;
chop($msg);

if($msg eq "no") {
    print $sock "end\n";
    die "Unrecognized language $opt_l.";
}

$i = 0;
while($i < $bindex) {
    &upload_file($opt_b[$i++],0,$opt_l);
}

$setid = 1;
foreach $file (@ARGV) {
    &upload_file($file,$setid++,$opt_l); 
}

print $sock "query 0 $opt_c\n";
print "Query submitted. Waiting for the server's response.\n";
&read_from_server();
print $sock "end\n";
close($sock);
"""

with open(path_moss, 'w') as output_moss:
    moss = re.sub('{{moss_user_id}}', str(moss_user_id), MOSS)
    output_moss.write(moss)

!chmod ug+x moss
!./moss -l python -b '{path_moss_base}' {path_py}*.py