Browse files

add anonymization scripts, log handling document, and sample Info dir…

…ectory
  • Loading branch information...
1 parent d662274 commit 69653509a19bf7d34a5267093e0f9f46c2ed78be anders committed Jul 11, 2008
View
7 LogProcessing/Info.sample/class-3326.xml
@@ -0,0 +1,7 @@
+<class>
+ <name>SP212 General Physics II</name>
+ <school>USNA</school>
+ <period>3326</period>
+ <description>Electricity and Magnetism Spring 2007</description>
+ <instructor>Don Treacy</instructor>
+</class>
View
7 LogProcessing/Info.sample/class-5526.xml
@@ -0,0 +1,7 @@
+<class>
+ <name>SP212 General Physics II</name>
+ <school>USNA</school>
+ <period>5526</period>
+ <description>Electricity and Magnetism Spring 2007</description>
+ <instructor>Don Treacy</instructor>
+</class>
View
7 LogProcessing/Info.sample/class-6546.xml
@@ -0,0 +1,7 @@
+<class>
+ <name>SP212 General Physics II</name>
+ <school>USNA</school>
+ <period>6546</period>
+ <description>Electricity and Magnetism Spring 2007</description>
+ <instructor>Mary Wintersgill</instructor>
+</class>
View
79 LogProcessing/Info.sample/classmap.txt
@@ -0,0 +1,79 @@
+DE16B 3326
+DE14D 3326
+DE099 3326
+DB8B7 3326
+DDEEF 3326
+090966 3326
+DDC25 3326
+DD961 3326
+DD8DD 3326
+DD7F9 3326
+DD71E 3326
+DD517 3326
+DD457 3326
+DD2BF 3326
+DD2A7 3326
+DCF0B 3326
+live4nectar 3326
+DCE39 3326
+DCD49 3326
+DCD19 3326
+DCD07 3326
+m095652 3326
+DCC4D 3326
+DCC3B 3326
+DC96B 3326
+DE20A 5526
+DE18F 5526
+DDD63 5526
+DDD33 5526
+DD925 5526
+DD64F 5526
+DD60D 5526
+DD54D 5526
+DD45D 5526
+DD3D3 5526
+DD1AB 5526
+DD199 5526
+DD01F 5526
+DCF65 5526
+DCFA1 5526
+DCDA9 5526
+DCB21 5526
+DC9B9 5526
+DC995 5526
+DC8F3 5526
+DC8E7 5526
+DC791 5526
+DC6EF 5526
+DC6E9 5526
+DE24F 6546
+DE22B 6546
+DE147 6546
+DE05D 6546
+DDF07 6546
+DDF01 6546
+DDF01 6546
+DDDFF 6546
+DDD57 6546
+DDD21 6546
+DDC1F 6546
+DDA87 6546
+DD955 6546
+DD8E3 6546
+DD7E7 6546
+DD74B 6546
+DD48D 6546
+DD2DD 6546
+DD193 6546
+DD06D 6546
+DD037 6546
+DCFDD 6546
+DCECF 6546
+DCECF 6546
+DCDCD 6546
+DCBF9 6546
+DCB03 6546
+DDACF 5526
+DD469 3326
+
View
4 LogProcessing/Info.sample/condition-control.xml
@@ -0,0 +1,4 @@
+ <condition>
+ <name>Katz Control</name>
+ <type>Control</type>
+ </condition>
View
4 LogProcessing/Info.sample/condition-experiment1.xml
@@ -0,0 +1,4 @@
+ <condition>
+ <name>Katz Short KCD</name>
+ <type>Experimental</type>
+ </condition>
View
4 LogProcessing/Info.sample/condition-experiment2.xml
@@ -0,0 +1,4 @@
+ <condition>
+ <name>Katz Long KCD</name>
+ <type>Experimental</type>
+ </condition>
View
1 LogProcessing/Info.sample/dataset.txt
@@ -0,0 +1 @@
+USNA Physics Spring 2007
View
573 LogProcessing/Info.sample/unitmap.txt
@@ -0,0 +1,573 @@
+MOMR1A Angular Momentum
+MOMR1B Angular Momentum
+MOMR2A Angular Momentum
+MOMR2B Angular Momentum
+MOMR3A Angular Momentum
+MOMR4A Angular Momentum
+EQCAP1A Capacitance
+EQCAP1B Capacitance
+EQCAP1C Capacitance
+EQCAP1D Capacitance
+EQCAP2A Capacitance
+EQCAP2B Capacitance
+EQCAP3A Capacitance
+EQCAP3B Capacitance
+EQCAP4A Capacitance
+EQCAP4B Capacitance
+EQCAP5A Capacitance
+EQCAP6A Capacitance
+CAP1A Capacitance
+CAP1B Capacitance
+CAP2A Capacitance
+CAP2B Capacitance
+CAP3A Capacitance
+CAP4A Capacitance
+CAP5A Capacitance
+CAP6A Capacitance
+CAP6B Capacitance
+CAP9A Capacitance
+CAP9B Capacitance
+ROTS1A Circular Motion
+ROTS1B Circular Motion
+ROTS1C Circular Motion
+ROTS2A Circular Motion
+ROTS3A Circular Motion
+ROTS4A Circular Motion
+ROTS5A Circular Motion
+ROTS6A Circular Motion
+ROTS6B Circular Motion
+ROTS6C Circular Motion
+ROTS7A Circular Motion
+ROTS8A Circular Motion
+ROTS8B Circular Motion
+KIR1A DC Circuits
+KIR1B DC Circuits
+KIR2A DC Circuits
+KIR3A DC Circuits
+KIR3B DC Circuits
+KIR3C DC Circuits
+KIR4A DC Circuits
+KIR5A DC Circuits
+KIR7A DC Circuits
+EPOW1 DC Circuits
+EPOW2 DC Circuits
+EPOW3 DC Circuits
+EPOW4 DC Circuits
+RC1A DC Circuits
+RC1B DC Circuits
+RC1C DC Circuits
+RC2A DC Circuits
+RC3A DC Circuits
+RC3B DC Circuits
+RC4A DC Circuits
+RC4B DC Circuits
+RC5A DC Circuits
+RC6A DC Circuits
+RC7A DC Circuits
+RC7B DC Circuits
+RC8 DC Circuits
+RC9 DC Circuits
+CHARGE1A Electric Field
+CHARGE1B Electric Field
+CHARGE2 Electric Field
+COUL1A Electric Field
+COUL1B Electric Field
+COUL1C Electric Field
+COUL2A Electric Field
+COUL2B Electric Field
+COUL2C Electric Field
+COUL3 Electric Field
+EFIELD1A Electric Field
+EFIELD1B Electric Field
+EFIELD1C Electric Field
+EFIELD1D Electric Field
+EFIELD1E Electric Field
+EFIELD2 Electric Field
+EFIELD3 Electric Field
+EFIELD4A Electric Field
+EFIELD4B Electric Field
+FOR1A Electric Field
+FOR1B Electric Field
+FOR1C Electric Field
+FOR2A Electric Field
+FOR2B Electric Field
+FOR4A Electric Field
+FOR4B Electric Field
+FOR5 Electric Field
+FOR7A Electric Field
+FOR7B Electric Field
+FOR8A Electric Field
+FOR8B Electric Field
+FOR9A Electric Field
+FOR9B Electric Field
+FOR10A Electric Field
+FOR10B Electric Field
+FOR11A Electric Field
+FOR11B Electric Field
+FOR11C Electric Field
+ELEC1A Electric Field
+ELEC1B Electric Field
+ELEC2 Electric Field
+ELEC3B Electric Field
+ELEC4B Electric Field
+ELEC5B Electric Field
+ELEC6B Electric Field
+GAUSS1 Electric Field
+GAUSS3 Electric Field
+GAUSS4 Electric Field
+GAUSS5 Electric Field
+GAUSS6 Electric Field
+GAUSS8 Electric Field
+GAUSS9 Electric Field
+GAUSS10 Electric Field
+GAUSS11 Electric Field
+DIP1A Electric Field
+DIP1B Electric Field
+EPOT1A Electric Potential
+EPOT1B Electric Potential
+EPOT1C Electric Potential
+EPOT2 Electric Potential
+POT1A Electric Potential
+POT1B Electric Potential
+POT2A Electric Potential
+POT2B Electric Potential
+POT2C Electric Potential
+POT3A Electric Potential
+POT3B Electric Potential
+POT4 Electric Potential
+POT5 Electric Potential
+POT6 Electric Potential
+POT7 Electric Potential
+POT8 Electric Potential
+FARA1A Electromagnetic Induction
+FARA1B Electromagnetic Induction
+FARA2A Electromagnetic Induction
+FARA2B Electromagnetic Induction
+FARA3A Electromagnetic Induction
+FARA3B Electromagnetic Induction
+FARA4A Electromagnetic Induction
+FARA4B Electromagnetic Induction
+FARA5A Electromagnetic Induction
+FARA5B Electromagnetic Induction
+FARA5C Electromagnetic Induction
+FARA6A Electromagnetic Induction
+FARA6B Electromagnetic Induction
+FARA7A Electromagnetic Induction
+FARA7B Electromagnetic Induction
+FARA7C Electromagnetic Induction
+FARA7D Electromagnetic Induction
+FARA8A Electromagnetic Induction
+FARA8B Electromagnetic Induction
+FARA8C Electromagnetic Induction
+FARA9 Electromagnetic Induction
+FARA10A Electromagnetic Induction
+FARA10B Electromagnetic Induction
+FARA11A Electromagnetic Induction
+FARA11B Electromagnetic Induction
+AMP1 Electromagnetic Induction
+EMWAVE1 Electromagnetic Waves
+WAVE19 Electromagnetic Waves
+EMWAVE3A Electromagnetic Waves
+EMWAVE4 Electromagnetic Waves
+EMWAVE5 Electromagnetic Waves
+E1A Energy-Work
+E1B Energy-Work
+E1C Energy-Work
+E2A Energy-Work
+E2B Energy-Work
+E2C Energy-Work
+E3A Energy-Work
+E4A Energy-Work
+E4B Energy-Work
+E4C Energy-Work
+E5A Energy-Work
+E5B Energy-Work
+E6A Energy-Work
+E7A Energy-Work
+E7B Energy-Work
+E8A Energy-Work
+E8B Energy-Work
+E9A Energy-Work
+E9B Energy-Work
+E10A Energy-Work
+E11A Energy-Work
+WE1A Energy-Work
+WE2A Energy-Work
+WE3A Energy-Work
+WE4A Energy-Work
+WE5 Energy-Work
+WE6 Energy-Work
+WE8 Energy-Work
+WE9 Energy-Work
+EGRAV1 Energy-Work
+FLUIDS1 Fluids
+FLUIDS2 Fluids
+FLUIDS3 Fluids
+FLUIDS4 Fluids
+FLUIDS5 Fluids
+FLUIDS6 Fluids
+FLUIDS7 Fluids
+FLUIDS8 Fluids
+FLUIDS9 Fluids
+FLUIDS11 Fluids
+FLUIDS12 Fluids
+FLUIDS13 Fluids
+FLUIDS14 Fluids
+FLUIDS15 Fluids
+FBD1A Free Body Diagrams
+FBD1B Free Body Diagrams
+FBD2A Free Body Diagrams
+FBD3A Free Body Diagrams
+FBD4A Free Body Diagrams
+FBD5A Free Body Diagrams
+FBD6A Free Body Diagrams
+FBD8 Free Body Diagrams
+FBD9 Free Body Diagrams
+IND1A Inductance
+IND1B Inductance
+IND1C Inductance
+IND2A Inductance
+IND3A Inductance
+IND3B Inductance
+IND3C Inductance
+IND4 Inductance
+LR1A Inductance
+LR1B Inductance
+LR1C Inductance
+LR1D Inductance
+LR2A Inductance
+LR2B Inductance
+LR3A Inductance
+LR3B Inductance
+LC1A Inductance
+LC2A Inductance
+LC2B Inductance
+LRC1A Inductance
+LRC2A Inductance
+LMOM1A Linear Momentum
+LMOM1B Linear Momentum
+LMOM2A Linear Momentum
+LMOM2B Linear Momentum
+LMOM3A Linear Momentum
+LMOM4A Linear Momentum
+LMOM5 Linear Momentum
+LMOM6 Linear Momentum
+LMOM7 Linear Momentum
+PGRAPH1 Linear Momentum
+PGRAPH2 Linear Momentum
+PGRAPH3 Linear Momentum
+IMP1 Linear Momentum
+IMP2 Linear Momentum
+IMP3A Linear Momentum
+IMP3B Linear Momentum
+IMP3C Linear Momentum
+CM1 Linear Momentum
+CM2 Linear Momentum
+CM3 Linear Momentum
+ROC1 Linear Momentum
+ROC2 Linear Momentum
+ROC3 Linear Momentum
+ROC4 Linear Momentum
+ROC5 Linear Momentum
+ROC6 Linear Momentum
+MAG1A Magnetic Field
+MAG1B Magnetic Field
+MAG1C Magnetic Field
+MAG2A Magnetic Field
+MAG2B Magnetic Field
+MAG3A Magnetic Field
+MAG3B Magnetic Field
+MAG4A Magnetic Field
+MAG5A Magnetic Field
+MAG5B Magnetic Field
+MAGTOR1A Magnetic Field
+MAGTOR1B Magnetic Field
+MAGTOR1C Magnetic Field
+MAGTOR1D Magnetic Field
+MAGDIP1 Magnetic Field
+MAGDIP2 Magnetic Field
+MAGDIP3 Magnetic Field
+MAGDIP4 Magnetic Field
+MAG6A Magnetic Field
+MAG6B Magnetic Field
+MAG6C Magnetic Field
+MAG7 Magnetic Field
+MAG8A Magnetic Field
+MAG8B Magnetic Field
+MAG9 Magnetic Field
+MAG10 Magnetic Field
+MAG11 Magnetic Field
+MAG12 Magnetic Field
+MIRROR1 Optics
+MIRROR2 Optics
+MIRROR3 Optics
+MIRROR4 Optics
+LENS1A Optics
+LENS1B Optics
+LENS2A Optics
+LENS2B Optics
+LENS3A Optics
+LENS3B Optics
+LENS4A Optics
+LENS4B Optics
+LENS5A Optics
+LENS5B Optics
+REF1 Optics
+REF2A Optics
+REF2B Optics
+REF2C Optics
+REF3A Optics
+REF3B Optics
+REF4A Optics
+REF4B Optics
+REF5A Optics
+REF5B Optics
+REF6 Optics
+INT1A Optics
+INT1B Optics
+INT1C Optics
+INT1D Optics
+INT2A Optics
+INT2B Optics
+OSC1 Oscillations
+OSC2 Oscillations
+OSC3 Oscillations
+OSC4 Oscillations
+OSC5 Oscillations
+OSC6 Oscillations
+OSC7 Oscillations
+OSC8 Oscillations
+POW1A Power
+POW1B Power
+POW2A Power
+POW3A Power
+POW4A Power
+POW4B Power
+POW5A Power
+POW5B Power
+POW5C Power
+POW5D Power
+POW6A Power
+EQRES1A Resistance
+EQRES1B Resistance
+EQRES1C Resistance
+EQRES1D Resistance
+EQRES1E Resistance
+EQRES2A Resistance
+EQRES2B Resistance
+EQRES3A Resistance
+EQRES3B Resistance
+EQRES4A Resistance
+EQRES4B Resistance
+EQRES5A Resistance
+EQRES6A Resistance
+EQRES7A Resistance
+EQRES7B Resistance
+EQRES8A Resistance
+EQRES8B Resistance
+DR1A Rotational Dynamics
+DR2A Rotational Dynamics
+DR2B Rotational Dynamics
+DR3A Rotational Dynamics
+DR4A Rotational Dynamics
+DR5A Rotational Dynamics
+DR6A Rotational Dynamics
+DR6B Rotational Dynamics
+DR7A Rotational Dynamics
+DR8A Rotational Dynamics
+EROT2 Rotational Dynamics
+EROT3 Rotational Dynamics
+EROT4 Rotational Dynamics
+GRAV1 Rotational Dynamics
+GRAV2 Rotational Dynamics
+GRAV3 Rotational Dynamics
+GRAV4 Rotational Dynamics
+GRAV5 Rotational Dynamics
+KR8 Rotational Kinematics
+KR9 Rotational Kinematics
+KR1A Rotational Kinematics
+KR1B Rotational Kinematics
+KR1C Rotational Kinematics
+KR2A Rotational Kinematics
+KR2B Rotational Kinematics
+KR3A Rotational Kinematics
+KR3B Rotational Kinematics
+KR3C Rotational Kinematics
+KR4A Rotational Kinematics
+KR4B Rotational Kinematics
+KR5A Rotational Kinematics
+KR6A Rotational Kinematics
+KR7A Rotational Kinematics
+S1A Statics
+S1B Statics
+S1C Statics
+S1D Statics
+S1E Statics
+S1F Statics
+S2A Statics
+S2B Statics
+S2C Statics
+S2D Statics
+S2E Statics
+S3A Statics
+S3B Statics
+S3C Statics
+S4A Statics
+S4B Statics
+S5A Statics
+S6A Statics
+S7A Statics
+S7B Statics
+S8A Statics
+S9A Statics
+S10A Statics
+S11A Statics
+S11B Statics
+S12A Statics
+S13 Statics
+S14 Statics
+S15 Statics
+S16 Statics
+S17 Statics
+DQ1 Translational Dynamics
+DT1A Translational Dynamics
+DT1B Translational Dynamics
+DT1C Translational Dynamics
+DT2A Translational Dynamics
+DT3A Translational Dynamics
+DT3B Translational Dynamics
+DT3C Translational Dynamics
+DT4A Translational Dynamics
+DT4B Translational Dynamics
+DT5A Translational Dynamics
+DT6A Translational Dynamics
+DT6B Translational Dynamics
+DT6C Translational Dynamics
+DT7A Translational Dynamics
+DT7B Translational Dynamics
+DT8A Translational Dynamics
+DT9A Translational Dynamics
+DT10A Translational Dynamics
+DT11A Translational Dynamics
+DT11B Translational Dynamics
+DT12A Translational Dynamics
+DT13A Translational Dynamics
+DT13B Translational Dynamics
+DT14A Translational Dynamics
+DT14B Translational Dynamics
+DT16 Translational Dynamics
+DT17 Translational Dynamics
+DT18 Translational Dynamics
+DT19 Translational Dynamics
+KT1A Translational Kinematics
+KT1B Translational Kinematics
+KT2A Translational Kinematics
+KT2B Translational Kinematics
+KT3A Translational Kinematics
+KT3B Translational Kinematics
+KT4A Translational Kinematics
+KT4B Translational Kinematics
+KT5A Translational Kinematics
+KT6A Translational Kinematics
+KT6B Translational Kinematics
+KT7A Translational Kinematics
+KT7B Translational Kinematics
+KT8A Translational Kinematics
+KT8B Translational Kinematics
+KT9A Translational Kinematics
+KT9B Translational Kinematics
+KT10A Translational Kinematics
+KT10C Translational Kinematics
+KT11A Translational Kinematics
+KT11B Translational Kinematics
+KT12A Translational Kinematics
+KT12B Translational Kinematics
+KT12C Translational Kinematics
+KT13A Translational Kinematics
+KT13B Translational Kinematics
+KT13C Translational Kinematics
+KT14A Translational Kinematics
+KT14B Translational Kinematics
+KGRAPH1 Translational Kinematics
+KGRAPH2 Translational Kinematics
+KGRAPH3 Translational Kinematics
+KGRAPH4 Translational Kinematics
+KGRAPH5 Translational Kinematics
+KGRAPH6 Translational Kinematics
+KGRAPH7 Translational Kinematics
+KGRAPH8 Translational Kinematics
+KGRAPH9 Translational Kinematics
+KGRAPH10 Translational Kinematics
+KGRAPH11 Translational Kinematics
+KGRAPH12 Translational Kinematics
+KGRAPH13 Translational Kinematics
+KGRAPH14 Translational Kinematics
+KGRAPH16 Translational Kinematics
+KGRAPH17 Translational Kinematics
+KGRAPH18 Translational Kinematics
+KGRAPH19 Translational Kinematics
+KGRAPH20 Translational Kinematics
+KGRAPH21 Translational Kinematics
+VEC1A Vectors
+VEC1B Vectors
+VEC1C Vectors
+VEC1D Vectors
+VEC2A Vectors
+VEC2B Vectors
+VEC2C Vectors
+VEC2D Vectors
+VEC3A Vectors
+VEC3B Vectors
+VEC3C Vectors
+VEC4A Vectors
+VEC4B Vectors
+VEC4C Vectors
+VEC4D Vectors
+VEC5A Vectors
+VEC5B Vectors
+VEC5C Vectors
+VEC5D Vectors
+VEC6A Vectors
+VEC6B Vectors
+VEC6C Vectors
+VEC6D Vectors
+VEC7A Vectors
+VEC8A Vectors
+VEC8B Vectors
+VEC8C Vectors
+VEC9 Vectors
+RELVEL1A Vectors
+RELVEL2A Vectors
+RELVEL3A Vectors
+MOT1 Vectors
+MOT2 Vectors
+MOT3 Vectors
+MOT4 Vectors
+WAVE1 Waves
+WAVE2 Waves
+WAVE3 Waves
+WAVE4 Waves
+WAVE5 Waves
+WAVE6 Waves
+WAVE8 Waves
+WAVE9 Waves
+WAVE10 Waves
+WAVE11 Waves
+WAVE12 Waves
+WAVE13 Waves
+WAVE14 Waves
+WAVE15 Waves
+WAVE16 Waves
+WAVE17 Waves
+WAVE18 Waves
+WAVE24 Waves
+FOR4A Electric Field
+FOR4B Electric Field
+ELEC1A Electric Field
+FOR4C Electric Field
+ELEC2 Electric Field
+ELEC3A Electric Field
+ELEC4A Electric Field
+ELEC5A Electric Field
+ELEC6A Electric Field
+ELEC7A Electric Field
View
357 LogProcessing/OLI Log Processing.html
@@ -0,0 +1,357 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html>
+<head>
+ <meta content="text/html; charset=ISO-8859-1"
+ http-equiv="content-type">
+ <title>ANDES Log Processing</title>
+</head>
+<body>
+<h1>Processing ANDES Logs from OLI</h1>
+<br>
+This document explains how to obtain Andes log sets from OLI, anonymize
+them, and convert them into datashop format. All scripts for dealing
+with log files can be found in the LogProcessing directory of the Andes
+tree.<br>
+<br>
+<h2>Obtaining Logs from OLI</h2>
+OLI logs can be retreived from the oli QA server,
+oli-qa.andrew.cmu.edu, using OLI's <span style="font-style: italic;">Data
+Extraction tool.</span> OLI will have to give you an account and
+configure your access to the data extraction tool, after which a link
+for it will show up as a long on the top right of your start page when
+you log in to OLI.<br>
+<br>
+Logs on the QA server are mirrors of those on the OLI production
+server. These mirrors are made periodically, typically when QA software
+is updated and at other irregular intervals. So, they typically lag the
+data on production by a few weeks. It is not allowed to retrieve logs
+from the production server since the process can overload the server. <br>
+<br>
+For courses hosted on the PSLC server, the Data Extraction tool may be
+made available on the live server at any time.<br>
+<br>
+<h2>Running the Data Extraction tool</h2>
+After clicking the Data Extraction tool, you are walked through a
+series of screens to define a query. You should make the following
+selections:<br>
+<br>
+&nbsp;&nbsp; Step 1: Select scope of data<br>
+&nbsp;&nbsp;&nbsp;&nbsp; Go down to "Course Sections" and select the
+course sections you want. The available ones for you should be
+highlighted. OLI may have to configure your access to particular course
+logs.
+Otherwise you will see them but not be able to select them in the Data
+Extraction tool interface. <br>
+&nbsp;&nbsp;&nbsp; Do not select anything else on this page. You do not
+normally want to select by content package, for example.<br>
+&nbsp;&nbsp;&nbsp; Click Go to Step 2<br>
+<br>
+&nbsp;&nbsp;&nbsp; Step 2: Options<br>
+&nbsp;&nbsp;&nbsp; &nbsp;&nbsp; &nbsp;&nbsp; <span
+ style="font-weight: bold;">Columns</span>: This selects which columns
+from the OLI log database will be included in the output in a
+comma-separated format. The essential one is the "info" column. The
+info column records application-specific information. In our
+case,&nbsp; there a single row in the OLI log database corresponding to
+the event of Andes uploading a log file at the end of an Andes problem
+session. This row includes the entire text of an Andes session logfile
+in the "info" field, so info should always remain checked. <br>
+&nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; One should also check the "Action
+TimeStamp"&nbsp; to receive the server's time at the time the log was
+made. This is useful to have because the date and time shown<span
+ style="font-style: italic;"> inside</span> the Andes logs is derived
+from the user's system clock, which may be incorrectly set. So the OLI
+recorded event time is more reliable to have. This time should be very
+close to the time of the <span style="font-style: italic;">end </span>of
+the Andes session, when the log file was uploaded. <br>
+&nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;&nbsp; One may check the Action
+Time Zone for a more complete specification about the time.
+However,&nbsp; since this event is recorded by the OLI server, this
+does not really add information -- the times here should always be in
+the Eastern time zone where the OLI servers are, even if the students
+are working in different time zones. <br>
+&nbsp;&nbsp;&nbsp; &nbsp;&nbsp; &nbsp;&nbsp; All other columns should
+be unchecked to keep the output simple.<br>
+<br>
+&nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;&nbsp;&nbsp; Notes: In theory
+there might be some use for the User Id column, which will show an
+anonymized OLI user id (into a long ugly GUID), or the Session GUID,
+which identifes an OLI login session. This detail could be used to
+correlate Andes logs with other OLI, non-ANDES log entries made by the
+same student or within the same OLI login session. For example, one can
+also use the Data Extraction tool to retrieving logs showing access to
+learning pages in the course. With that result, one could determine
+which learning pages the student visited in the same session as the
+Andes log was made.&nbsp; However, we have never made any use of this
+information up to now. <br>
+&nbsp;&nbsp;&nbsp; <br>
+&nbsp;&nbsp;&nbsp; The question has sometimes come up as to whether we
+can determine when students view training videos on OLI from their
+logs. Right now, there is no record made in the OLI log database of
+these events, because they are just serves of&nbsp; webcontent that do
+not pass through the OLI courseware system. On way around this would be
+to wrap each video in its own learning page. In this case there <span
+ style="font-style: italic;">would </span>be a log of those learning
+page accesses. However, that would not show whether the student had
+launched the video or not, merely whether they had opened the page on
+which it resides. Another possible way might be to convert the videos
+to Flash format, and make use of Flash media logging support built into
+certain OLI tools. This would log media viewing events in the
+DataShop's xml notation, and these events could then be retrieved <br>
+<br>
+&nbsp;&nbsp;&nbsp; &nbsp;&nbsp; &nbsp;&nbsp; <span
+ style="font-weight: bold;">Actions</span>: Select Andes. This is a
+custom action which indicates uploading an Andes log at the end of an
+Andes problem session.<br>
+<br>
+&nbsp;&nbsp;&nbsp; &nbsp;&nbsp; &nbsp;&nbsp; <span
+ style="font-weight: bold;">Filters</span>: leave blank<br>
+<br>
+&nbsp;&nbsp;&nbsp; &nbsp;&nbsp; &nbsp;&nbsp; <span
+ style="font-weight: bold;">Options</span>:&nbsp; Normally one should
+select a date range which includes all logs in the course but exclude
+other semesters that might contain the same student. The dates do
+not&nbsp; have to be exact . For example, for a Spring term 2007
+course, it could suffice to select a range from 2007-01-01 to
+2007-06-01, even if the exact course date range was somewhat narrower. <br>
+<br>
+&nbsp;&nbsp;&nbsp; &nbsp;&nbsp; &nbsp;&nbsp; The reason for this is
+that OLI does not actually record course information in the log
+database. Rather, when you request logs for a course, the Data
+Extraction server looks up the course roster and translates this into a
+query for logs by all students on that roster. If a student was in both
+a fall term and spring term course, this will retrieve logs from
+outside of the course.&nbsp; <br>
+<br>
+&nbsp;&nbsp;&nbsp; &nbsp;&nbsp; &nbsp;&nbsp; All other options should
+be left as defaults.<br>
+<br>
+At Step 3, the query is submitted and you should start downloading a
+zip file named data.zip. However, this step often times out or fails if
+the server is heavily
+loaded. In that case you just have to try again at a different time.<br>
+<br>
+&nbsp;&nbsp;&nbsp; The data.zip itself contains a single file named
+actions.csv, with all the data. Save this zip file with some
+identifying name like Fall2007-USNA.dat.z <br>
+<h4>Concatenated log format</h4>
+&nbsp;&nbsp;&nbsp; The embedded file you get is formally a csv file
+showing selected rows in a database, like a
+spreadsheet. The first line gives the column headings and subsequent
+lines give comma-separated values. As noted above, the
+last "column" of each line is the full Andes log, which normally
+includes multiple text lines within it.&nbsp; Effectively that makes
+this file a concatenated sequence of Andes logs, with a
+little bit of cruft before the initial log header line. <br>
+<br>
+&nbsp;&nbsp;&nbsp; For most purposes the concatenated file can be
+processed as a unit. By using zcat and piping output to further
+processing, it may not even necessary to uncompress it. However, it
+could also be split into separate log files for processing
+-- see the splitlogs.pl tool in the Andes LogProcessing directory. <br>
+<br>
+&nbsp;&nbsp;&nbsp; Within the concatenated log, individual session logs
+can be found by searching for the Andes log header line, which will
+have the following format:<br>
+<br>
+2008/04/03 18:15:21,Eastern,# Log of Andes session begun Thursday,
+April 03, 2008 18:14:50 by [user] on [computer]<br>
+<br>
+&nbsp;&nbsp;&nbsp; Individual logs should each end with an END-LOG line<br>
+1:00&nbsp;&nbsp;&nbsp; END-LOG <br>
+&nbsp;&nbsp;&nbsp; However, in theory an error could prevent the
+END-LOG statement from getting into the log, so it is safest just to
+end a log at the header of the next one.<br>
+<br>
+&nbsp;&nbsp;&nbsp;&nbsp; Technically this file is in Unix text file
+format, in which there is a single newline characters at the end of
+each row from the database. However, the last column of all the rows
+after the first, header line, end with the full text of an Andes log
+file, and an Andes log file is tself a multi-line Windows-format&nbsp;
+text file in which the lines end with carriage-return linefeed pairs.
+One effect of this is that when viewed in a text editor, there will
+appears to be a blank line after each log file. <br>
+<br>
+&nbsp;&nbsp; <br>
+<h2><span style="font-weight: bold;">Anonymizing the logs</span></h2>
+<h2><span style="font-weight: bold;"></span></h2>
+<br>
+There are different scripts for anonymizing USNA logs, where our
+students are supposed to use their alphas, aka mid numbers, as user
+ids, and anonymizing arbitrary student logs, where the ids may take any
+form.<br>
+<h5>Anonymizing USNA midshipmen logs: mkanon-usna.pl<br>
+</h5>
+To anonymize mid logs, obtain the file mkanon-usna.pl.template. Edit it
+to include a different hash function in the munge_id routine to map the
+mid number into some anonymization code. Rename it to makanon-usna.pl.
+The basic way to anonymize is then:<br>
+&nbsp;&nbsp;&nbsp; &nbsp;&nbsp; &nbsp;&nbsp; zcat Fall2007-USNA.dat.z |
+perl mkanon-usna.pl&nbsp; &gt; Fall2007-USNA-anon.dat 2&gt; idmap.txt<br>
+<br>
+The standard output will be the anonymized concatenated log
+(uncompressed). The stderr output will contain any error messages, of
+which the most important is reports of ids that do not conform to the
+mid number patten. This will be followed by a tab-delimited listing of
+the id mapping file generated. This mapping should be saved reference
+and for use in applying the mapping to other files. So you should
+inspect idmap first, and edit out any error message before saving it.
+[Maybe better to just write idmap.txt file separately from error
+messages? ]<br>
+<br>
+A nuisance is that sometimes the same real student will have created
+two or more different OLI ids. Also, a student may not have used a mid
+number at all as an id. Normally we ask instructors to grant us TA
+access to courses we
+support so we can inspect the gradebook and rosters, so the usual way
+to identify such students is to inspect the OLI section rosters, which
+will show students with the same real name on different lines. <br>
+<br>
+Both these issues can be handled by creating a file named merge-ids.txt
+in the directory in which the script is run. This should be
+tab-separated two column listing mapping login ids to canonical&nbsp;
+user ids: instances of the first id will be mapped to the second before
+anonymizing. The second id need never have been actually used, e.g. for
+someone who used a non-mid number id, you could just map it to a mid
+number. <br>
+<br>
+Note the anonymization will already merge students who used two user
+ids consisting of a mid number with and without an initial "m" since it
+anonymizes based on the mid number itself. So no special entry is
+required for these.<br>
+<h6>Applying the id map to other files: mapid-usna.pl<br>
+</h6>
+The mapping defined in the saved file idmap.txt can be applied to other
+text files, e.g.a list of questionnaire respondants or students in some
+experimental condition, by using the script mapid-usna.pl.<br>
+&nbsp;&nbsp;&nbsp; &nbsp;&nbsp; mapid-usna.pl &lt; infile &gt; outfile<br>
+This script looks for a file named idmap.txt in the current directory.
+This also should be customized from the template file to include the
+same mapping function used in mkanon-usna. This will apply the map to
+mid numbers in the&nbsp; If an id is not found in the map, it will use
+the hashing algorithm to generate one, generating an error message. The
+advantage of&nbsp; re-using the log-generated mapping file is that it
+tells us when it encounters a user id for which no logs were found in
+the set. This may indicate an anomaly requiring investigation, e.g. a
+questionnaire respondant who entered his user id incorrectly.<br>
+<h5>Anonymizing arbitrary logs</h5>
+Student ids in non-USNA datasets can be anonymized with the mkanon.pl
+and mapid.pl scripts. These don't use a hashing algorithm. Rather, they
+simply generate ids by adding numbers to a specified prefix. To use
+these, create a&nbsp; tab-separated mapping file (of any name)
+initialized to contain a special mapping for PREFIX, e.g<br>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; PREFIX\tWH081<br>
+Then do<br>
+&nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; perl mkanon.pl
+mapfile.txt &lt; log &gt; newlog<br>
+where mapfile.txt is the mapping file. This will update mapfile.txt
+with the new mapping file. <br>
+<br>
+This can be done multiple times; in each case, an existing mapping will
+be used if found, and the mapping file will be updated with any new
+entries at the end.<br>
+<br>
+As above, the mapping can be applied to arbitrary text by<br>
+&nbsp;&nbsp;&nbsp; &nbsp;&nbsp; perl mapid.pl mapfile.txt &lt; old &gt;
+new<br>
+<br>
+<h2>Converting Raw Logs to DataShop Format</h2>
+The script log2xml.pl can be used to convert an anonymized dataset into
+files in the xml format the DataShop uses for import into their
+database. By the datashop's request, this will generate one folder per
+student, and one xml log file per session log. At the end of the
+conversion, this should be zipped up and delivered to the DataShop for
+dropoff. The DataShop should give you ssh access to a dropbox location
+on their "cooker" server for this purpose.<br>
+<h5>Info files</h5>
+Although the basic conversion is simple, the converter makes use of
+several external files to patch in information not included in the
+Andes raw logs. It will look for them in a subdirectory named "Info"
+within its working directory when run. <br>
+<br>
+Required:<br>
+&nbsp;&nbsp; dataset.txt -- contains dataset name <br>
+Optional:<br>
+&nbsp;&nbsp; classmap.txt -- student to class id mapping. <br>
+&nbsp;&nbsp; class-XXX.xml -- class XML element for class w/id XXX<br>
+&nbsp;&nbsp; conditionmap.txt -- student to condition id mapping<br>
+&nbsp;&nbsp; condition-XXX.xml -- condition element for id XXX<br>
+&nbsp;&nbsp; unitmap.txt&nbsp; -- problem to unit mapping<br>
+<br>
+The dataset.txt file must exit. All others are optional, if not found,
+this info will not be included in the conversion.<br>
+<br>
+Samples of these files can be viewed in the directory Info.sample in
+the LogProcessing directory. <br>
+<h6>Class information</h6>
+The classmap file is a two-column tab-separated mapping of student
+names to some string identifying the class they were in. This could be
+a section number like "3346" or it could just be an instructor name
+like "gershmann". The class-XXX.xml then gives the xml element to patch
+into the conversion for specifying the student's class.&nbsp; For
+example:<br>
+<br>
+&lt;class&gt;<br>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+&lt;name&gt;SP212 General Physics II&lt;/name&gt;<br>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+&lt;school&gt;USNA&lt;/school&gt;<br>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+&lt;period&gt;6546&lt;/period&gt;<br>
+&nbsp;&nbsp;&nbsp; &nbsp;&nbsp; &lt;description&gt;Electricity and
+Magnetism Spring 2007&lt;/description&gt;<br>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+&lt;instructor&gt;Mary Wintersgill&lt;/instructor&gt;<br>
+&lt;/class&gt;<br>
+<br>
+If all this information is not known, then it need not be included. <br>
+<br>
+This information must be obtained from OLI rosters or other sources
+from the instructor or experimenter. One way is to build a spreadsheet
+or list, cut the column or real names and save to a file, anonymize
+that file, and paste it back into the spreadsheet, then save in
+tab-delimited format.<br>
+<h6>Condition information</h6>
+Experimental condition information may come from two sources: it may be
+included in the logs, if OLI had been customized for different
+conditions, as in Sandy Katz's USNA experiments. In this case the
+condition will always be one of "control", "experiment", "experiment1"
+or "experiment2", which are the only possible values defined in our OLI
+course. In this case no conditionmap file is needed.<br>
+<br>
+Alternatively, the condition information may be patched in via a
+conditionmap.txt file, which maps student ids to experiment condition
+ids. This is more common for logs from lab studies. <br>
+<br>
+Again, the ids used here are arbitrary. The xml element
+condition-XXX.xml will be patched into the converted log to explain the
+student's experiment condition. A sample is<br>
+<br>
+&nbsp;&nbsp;&nbsp; &lt;condition&gt;<br>
+&nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &lt;name&gt;Katz Reflective
+Dialogue&lt;/name&gt;<br>
+&nbsp;&nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;
+&lt;type&gt;Experimental&lt;/type&gt;<br>
+&nbsp;&nbsp;&nbsp; &lt;/condition&gt;<br>
+<br>
+Note a conditionmap will take priority over conditions found in the
+logs. It is not required to include any condition information.<br>
+<br>
+I believe the datashop format does not support multiple conditions in a
+log, even though a USNA student might have participated in multiple
+experiments, say a lab study and also a longer study like Sandy Katz's.
+<br>
+<h6>Unit information</h6>
+The converter also makes use of a mapping from problem id to problem
+set, e.g kt1 -&gt; Translational Kinematics, since this information is
+not in the logs. This is contained in the file unitmap.txt. This file
+can be generated from a set of aps files by the script&nbsp;
+mkunitmap.pl as<br>
+&nbsp;&nbsp;&nbsp; &nbsp;&nbsp; mkunitmap.pl *.aps &gt; unitmap.txt<br>
+<br>
+&nbsp;It changes rarely so it can usually just be copied. <br>
+<br>
+<br>
+<br>
+</body>
+</html>
View
25 LogProcessing/log2xml.pl
@@ -1,3 +1,4 @@
+#!/usr/bin/perl
#####################################################################
# log2xml -- convert ANDES log files into PSLC Datashop XML format
#
@@ -10,7 +11,8 @@
# output file is found in:
# student-id/session-id.xml
#
-# Uses the following files from its working directory:
+# Looks for the following supporting files in the Info subdirectory of
+# its current working directory:
# Required:
# dataset.txt -- contains dataset name
# Optional:
@@ -27,10 +29,14 @@
# provide only the single file class.xml
#
# Condition ids may come from set-condition statement in log
-# or from condition-map. There may be no conditions to set.
+# or from condition-map, with condition-map taking precedence.
+# There may be no conditions to set.
+#
+# The unitmap file needs to change only if new problems are added
+# It can be generated by the mkunitmap.pl script.
#
######################################################################
-my $revision_string = '$Revision: 1.10 $';
+my $revision_string = '$Revision: 1.11 $';
# globals for current log line
my ($timestamp, $event, $argstr);
@@ -114,7 +120,7 @@
# load the problem to unit mapping table, if it exists
if (open UNITMAP, "<Info/unitmap.txt") {
- %unitmap = map /(.*)\t(.*)\r/, <UNITMAP>;
+ %unitmap = map /(.*)\t([^\r]*)/, <UNITMAP>;
close UNITMAP;
$have_unitmap = 1;
#print STDERR "loaded unit map\n";
@@ -124,15 +130,15 @@
# we may get a condition from a conditionmap file, or else from set-condition
# above. If both are set, condition map will override.
if (open CONDITIONMAP, "<Info/conditionmap.txt") {
- %conditionmap = map /(.*)\t(.*)\r/, <CONDITIONMAP>;
+ %conditionmap = map /(.*)\t([^\r]*)/, <CONDITIONMAP>;
close CONDITIONS;
$have_conditionmap = 1;
#print STDERR "loaded condition map\n";
}
# load the student to class mapping table, if it exists
if (open CLASSMAP, "<Info/classmap.txt") {
- %classmap = map /(.*)\t(.*)\r/, <CLASSMAP>;
+ %classmap = map /(.*)\t([^\r]*)/, <CLASSMAP>;
close CLASSMAP;
$have_classmap = 1;
}
@@ -142,7 +148,7 @@
#--------------------------------------------------------------------------------
while (<>) {
chomp($_);
- s/\r$//; # delete dangling CR's remaining from Unix to DOS conversion
+ s/\r$//; # cygwin PERL includes DOS-mode CR's by default, see PERLIO
# Log header line begins a new log. We could reset and get date from this, but
# in fact we get it from initial set-session-id call below. However, need to
@@ -282,6 +288,11 @@
$unit_level_begin = "<level type=\'module\'><name>$unit</name>";
$unit_level_end = "</level>";
} else { $unit_level_begin = $unit_level_end = "" };
+ # AW: group level is only defined to make it easy to select, say,
+ # all problems named CM*, in the datashop by selecting that group.
+ # But datashop might let you do this anyway, even without the group level,
+ # just by using a pattern match on the problem name. If so, then there
+ # is not much point in including this.
# try to include a group level using name prefix, if we find one
# e.g. cm1a => CM*, roc2a => ROC*, etc.
$group = $problem; # default if name doesn't include a number
View
45 LogProcessing/mapid-usna.pl.template
@@ -0,0 +1,45 @@
+#!/usr/bin/perl
+#
+# mapid -- map USNA mid ids in input file using external tab-delimited mapping
+# file
+#
+# changes occurrences of an apparent mid id , once per line.
+# An apparent mid id is any six digit number optionally preceded by an "m" or "M".
+
+# load mapping from "idmap.txt" in current working directory
+open MAPFILE, "<idmap.txt" or die "open: $!";
+%idmap = map /(.*)\t([^\r]*)/, <MAPFILE>;
+close MAPFILE;
+
+while (<>)
+{
+ s/\r$//; # cygwin perl includes CR from DOS-mode text files by default
+
+ # Match any six digit number, optionally preceded by an "m".
+ # Note numeric ids coming out of excel may lose initial
+ # zero -- must fix this if this script is to work.
+ # !!! also matches digit string with more than 6 digits -- fix?
+ if (/[Mm]?([\d][\d][\d][\d][\d][\d])/) {
+ $alpha = $1;
+ if (($newid = $idmap{$alpha}) or
+ ($newid = $idmap{"m" . $alpha}) or
+ ($newid = $idmap{"M" . $alpha}) ){
+ # print "changing $& to $newid\n";
+ s/$&/$newid/;
+ } else {
+ &munge_id();
+ s/$&/$newid/;
+ print STDERR "no log idmap entry for $& generated $newid\n";
+ }
+ }
+ print;
+}
+
+sub munge_id () # reads global $alpha, sets global $newid
+{
+ $num = $alpha;
+ # simple sample mapping function
+ $num += 8765;
+ # treat new num string as integer and format in hex
+ $newid = sprintf("%X", ($num + 0));
+}
View
73 LogProcessing/mapid.pl
@@ -0,0 +1,73 @@
+#!/usr/bin/perl
+#
+# mapid -- map list of student ids in input file using external
+# tab-delimited mapping file, generating new id if not found
+# and updating
+#
+# Usage: mapid.pl mapfile.txt [idfile.txt]
+# Reads id list from second argument, standard input if none. Writes
+# anonymized list to stdout, updating mapfile with newly generated
+# mappings if any. Also writes newly generated pairs to stderr.
+#
+# Mapfile is tab-delimited list of canonical-name anon-id pairs. It
+# should contain a special entry for "PREFIX" specifying the prefix
+# to use when generating ids.
+#
+
+my $filename = $ARGV[0]; shift @ARGV;
+if (! $filename) { die "usage: mapid.pl mapfile.txt [inputfile]\n"; }
+
+# count of ids generated with this prefix
+my $id_counter = 1;
+
+# load existing mapping from specified mapping file
+open MAPFILE, "<$filename" or die "Couldn't open $filename for reading: $!\n" ;
+%idmap = map /(.*)\t([^\r]*)/, <MAPFILE>; # gulp in hash; Allow DOS mode \r at eol
+close MAPFILE;
+if (1) { # debugging printout
+ $maplength = (keys idmap);
+ print STDERR "Input mapping ($maplength) entries\n";
+ while( ($k, $v) = each %idmap) {
+ print STDERR " |$k| |$v|\n";
+ }
+ print STDERR "End input mapping\n\n";
+}
+# ensure file includes prefix. Upper case ensures it can't conflict with
+# any canonicalized id.
+my $PREFIX_ID = "PREFIX";
+($prefix = $idmap{$PREFIX_ID}) or die "missing $PREFIX_ID entry in $filename\n";
+
+# set counter to one more than number of existing entries. Since
+# map contains a dummy prefix entry, this will be number of keys
+$id_counter = (keys idmap);
+#print STDERR "loaded id map, counter= $id_counter\n";
+
+while (<>)
+{
+ s/\r$//; # cygwin perl may include CR from DOS-mode text files
+ # ids may have spaces, dots, or odd chars. Assume begin and
+ # end with a word character.
+ if (/[\w].*[\w]/) {
+ # to ignore case differences, use canonical lower case
+ # form in mapping table.
+ $canon_id = $id = $&;
+ $canon_id =~ tr/[A-Z]/[a-z]/;
+ if (! ($anonid = $idmap{$canon_id})) {
+ $anonid = $idmap{$canon_id} = $prefix . $id_counter++;
+ print STDERR "$canon_id\t$anonid\n";
+ $modified_map = 1;
+ }
+ # NB: substitute for original, not canonical, form
+ s/$id/$anonid/;
+ }
+ print;
+}
+
+# update mapping file if mapping has been extended.
+if ($modified_map) {
+ open MAPFILE, ">$filename" or die "Couldn't open $filename for updating: $!\n";
+ while( ($k, $v) = each %idmap) {
+ print MAPFILE "$k\t$v\r\n";
+ }
+ close MAPFILE;
+}
View
123 LogProcessing/mkanon-usna.pl.template
@@ -0,0 +1,123 @@
+#!/usr/bin/perl
+#
+# mkanon -- anonymize USNA userids in Andes log files
+#
+# Usage: reads one or more logs from standard input, writes to standard
+# output
+#
+# If input files have ids in their names, can write to a concatenated
+# output file, then split out from there with splitlogs.
+#
+# Optional file merge-ids.txt in the current directory can specify
+# mapping from actual userids to effective user ids for anonymization.
+# This can be used to map non-mid-number ids to mid numbers, or multiple
+# different ids to the same id, or numeric ids used by instructors to names.
+
+# old custom merge mapping
+#%merge = ( "gossard419" => "m092508",
+# "blindmelon" => "m094098");
+
+if (open MERGEFILE, "<merge-ids.txt") {
+ %merge = map /(.*)\t([^\r]*)/, <MERGEFILE>;
+ close MERGEFILE;
+ #print STDERR "loaded merge map\n";
+}
+
+while (<>)
+{
+ s/\r$//; # cygwin perl includes CR from DOS-mode text files by default
+
+ # Log header part saying "by JoeSmith on Joes-Computer" can reveal identity
+ # $1 $2
+ if (/^(.*)# Log of Andes session begun (.*) by [\w-]+ on .*$/)
+ {
+ print "$1# Log of Andes session begun $2 by [user] on [computer]\n";
+ }
+ #
+ # initial set session id call normally contains userid as part
+ # $1 $2 $3
+ elsif (/^(.*)set-session-id "([^"]*)"(.*)$/)
+ {
+ # Session id form is UserID-MonthDay-Hours-Mins-Secs
+ # but any spaces in UserID are converted to hyphens
+ # ($id, $rest) = split(/-/, $2, 2);
+ @id_parts = split('-', $2);
+ $nparts = @id_parts;
+ $id = $session_id = join(' ', @id_parts[0 .. $nparts-5]);
+ $rest = join('-', @id_parts[$nparts-4 .. $nparts-1]);
+ &munge_id();
+ print "$1set-session-id \"$newid-$rest\"$3\n";
+ }
+ #
+ # read-student-info call also has user name
+ # $1 $2 $3
+ elsif (/^(.*)read-student-info "([^"]*)"(.*)$/)
+ {
+ $id = $2; # session label should start with student id
+ if ($id ne $session_id) {
+ print STDERR "warning: student id $id != session id part $session_id!\n";
+ }
+ &munge_id ();
+ print "$1read-student-info \"$newid\"$3\n";
+ }
+ #
+ # kcd urls in hints can contain the user id. Here we just map
+ # it anywhere it is found
+ #
+ elsif ($id && /$id/i)
+ {
+ s/$id/$anonid/g;
+ print;
+ }
+
+ # !!! Standalone Andes also records interactions with Login dialog
+ # should note this and suppress characters while in this box
+ else {
+ print;
+ }
+} # end while (<>)
+
+# at end of input, dump idmap so it can be saved
+foreach $id (sort keys %idmap) {
+ print STDERR "$id\t$idmap{$id}\n";
+}
+
+
+sub munge_id () # reads global $id, sets global $newid
+{
+ # check if we have noted this as a duplicate account of some student
+ $primary_id = $merge{$id} ? $merge{$id} : $id;
+
+ # check if it's a mid number which may occur with or without the initial "m" or "midn"
+ # prefix. If not, it may be a teacher or TA log
+ # Note: pattern is anchored at beginning but not end of id, so allows trailing cruft in
+ # id after proper mid number. This allows for extra letter as in m102340x which was used.
+ # But also matches longer digit string as in 26384826 used by instructor McClanahan.
+ # Maybe better to anchor at end and handle ids with trailing cruft via merge mechanism.
+ if ($primary_id =~ /^(m|mid|midn)?([\d][\d][\d][\d][\d][\d])/i)
+ {
+ # following in case we want to map on substructure: year parts will all
+ # be the same within a particular dataset.
+ # if ($primary_id =~ /^(m|mid|midn)?([\d][\d])([\d][\d][\d][\d])$/i)
+ # $yr = $2; # first two digits are two digit class year: 07, 08, 09 etc.
+ # $snum = $3; # remaining four digits are student number
+ $num= $2;
+
+ # simple sample mapping function:
+ $num += 8765;
+
+ # treat new num string as integer and format in hex
+ $newid = sprintf("%X", ($num + 0));
+ # remember mapping
+ $idmap{$id} = $newid;
+ }
+ else {
+ # !!! Put alternative method here for non-usna student names
+ if (! $warned{$id} ) {
+ print STDERR "Non mid id $id found in $_\n";
+ $warned{$id} = 1;
+ }
+ $newid = $id;
+ $idmap{$id} = $newid;
+ }
+}
View
144 LogProcessing/mkanon.pl
@@ -0,0 +1,144 @@
+#!/usr/bin/perl
+#
+# mkanon -- anonymize arbitrary userids in Andes log files,
+# reading and updating a mapping file
+#
+# Usage: mkanon.pl mapfile.txt < logs > newlogs
+#
+# reads one or more logs from standard input, writing anonymized logs
+# to standard output.
+#
+# Mapping file expected to be generated by mapid2.pl applied to list
+# of user names.
+#
+# This works on a concatenated sequence of logs. If input files are
+# separate logs with user ids in their names, can write to a concatenated
+# output file, then split individual logs out from there with splitlogs.
+#
+
+# Load mapping file
+my $filename = $ARGV[0]; shift @ARGV;
+if (! $filename) { die "usage: mapid.pl mapfile.txt [inputfile]\n"; }
+
+# count of ids generated with this prefix
+my $id_counter = 1;
+
+# load existing mapping from specified mapping file
+open MAPFILE, "<$filename" or die "Couldn't open $filename for reading: $!\n" ;
+%idmap = map /(.*)\t(.*)\r/, <MAPFILE>; # gulps in hash. Assumes DOS mode \r is read
+close MAPFILE;
+if (0) { # debugging printout
+ $maplength = (keys idmap);
+ print STDERR "Input mapping ($maplength) entries\n";
+ while( ($k, $v) = each %idmap) {
+ print STDERR " |$k| |$v|\n";
+ }
+ print STDERR "End input mapping\n\n";
+}
+# ensure file includes prefix. Upper case ensures it can't conflict with
+# any canonicalized id.
+my $PREFIX_ID = "PREFIX";
+($prefix = $idmap{$PREFIX_ID}) or die "missing $PREFIX_ID entry in $filename\n";
+
+# set counter to one more than number of existing entries. Since
+# map contains a dummy prefix entry, this will be number of keys
+$id_counter = (keys idmap);
+#print STDERR "loaded id map, counter= $id_counter\n";
+
+# Optional file merge-ids.txt in the current directory can specify
+# mapping from actual userids to effective user ids for anonymization.
+# This can be used to map non-mid-number ids to mid numbers, or multiple
+# different ids to the same id, or numeric ids used by instructors to names.
+my %merge;
+if (open MERGEFILE, "<merge-ids.txt") {
+ %merge = map /(.*)\t([^\s]*)/, <MERGEFILE>;
+ close MERGEFILE;
+ #print STDERR "loaded merge map\n";
+}
+
+
+while (<>)
+{
+ # cygwin perl on windows seems to read in the CR from a dos-mode (CRLF-terminated) text file.
+ # [Better would be to translate CRLF on read to a logical end of line represented as a single NL.)
+ # So strip any trailing CR, to ensure we have a line a string with a trailing logical NL marker only.
+ # Note "print" without arguments will print the whole line including the NL, which does seem to
+ # translate to a CRLF when done on Windows. When printing a constructed string, we have to include
+ # a NL, even if copying a suffix pattern match, since patterns normally don't match the NL.
+ s/\r$//;
+
+ # Log header part saying "by JoeSmith on Joes-Computer" can reveal identity
+ # $1 $2
+ if (/^(.*)# Log of Andes session begun (.*) by [\w-]+ on .*$/)
+ {
+ print "$1# Log of Andes session begun $2 by [user] on [computer]\n";
+ }
+ # Standalone Andes also records typing in the Login dialog box
+ elsif (/^(.*)C 1127 (.*)$/) {
+ print "$1C1127 ?\n";
+ }
+ #
+ # initial set session id call normally contains userid as part
+ # $1 $2 $3
+ elsif (/^(.*)set-session-id "([^"]*)"(.*)$/)
+ {
+ # Session id form is UserID-MonthDay-Hours-Mins-Secs
+ # but any spaces in UserID are converted to hyphens
+ @id_parts = split('-', $2);
+ $nparts = @id_parts;
+ $id = $session_id = join(' ', @id_parts[0 .. $nparts-5]);
+ $rest = join('-', @id_parts[$nparts-4 .. $nparts-1]);
+ &munge_id();
+ print "$1set-session-id \"$anonid-$rest\"$3\n";
+ }
+ #
+ # read-student-info call also has user name
+ # $1 $2 $3
+ elsif (/^(.*)read-student-info "([^"]*)"(.*)$/)
+ {
+ $id = $2; # session label should start with student id
+ if ($id ne $session_id) {
+ print STDERR "warning: student id $id != session id part $session_id!\n";
+ }
+ &munge_id ();
+ print "$1read-student-info \"$anonid\"$3\n";
+ }
+ #
+ # Open-Problem can contain user ids in the solution directory path
+ #
+
+ #
+ # !!! kcd urls in hints can contain the user id
+ #
+ elsif ($id && /$id/i)
+ {
+ s/$id/$anonid/g;
+ print;
+ }
+ else {
+ print;
+ }
+} # end while (<>)
+
+# at end of input, dump idmap so it can be saved
+if ($modified_map) {
+ open MAPFILE, ">$filename" or die "Couldn't open $filename for updating: $!\n";
+ while( ($k, $v) = each %idmap) {
+ print MAPFILE "$k\t$v\r\n";
+ }
+ close MAPFILE;
+}
+
+
+sub munge_id () # reads global $id, sets global $anonid
+{
+ # check if we have noted this as a duplicate account of some student
+ $canon_id = $merge{$id} ? $merge{$id} : $id;
+ # canonicalize to lower case
+ $canon_id =~ tr/[A-Z]/[a-z]/;
+ if (! ($anonid = $idmap{$canon_id})) {
+ $anonid = $idmap{$canon_id} = $prefix . $id_counter++;
+ print STDERR "$canon_id\t$anonid\n";
+ $modified_map = 1;
+ }
+}
View
21 LogProcessing/mkunitmap.pl
@@ -0,0 +1,21 @@
+#!/usr/bin/perl
+#
+# mkunitmap -- create unit map file from .aps files
+#
+# Usage: reads one or more aps files from standard input, writes
+# tab-delimited list of problem id, problem set pairs to stdout.
+#
+# This can be used to generate the mapping file needed by log2xml
+use File::Basename;
+while (<>)
+{
+ chomp;
+ next if (/ANDES Problem Set/);
+ next if (/\.wmv/);
+ $problem = $_;
+ $problem =~ tr/a-z/A-Z/;
+ $problem =~ s/\r//;
+ next if $problem eq "";
+ ($setname,$dir,$ext) = fileparse($ARGV, qr/\..*/);
+ print "$problem\t$setname\n";
+}
View
13 LogProcessing/unescape.pl
@@ -0,0 +1,13 @@
+# copy stdin to stdout, changing CR-slash-n sequences to CR-LF
+# Needed to handle logs extracted by certain database query which
+# returns entire logs as single lines, with newlines escaped, using
+# a single newline char between logs:
+
+while (<>) { # gulps a whole log as a single line with LF at end
+ chomp($_); # remove final NL separating logs
+ s/\r\\n/\r\n/g; # CR bslash n => CR LF used at end of lines
+ s/\\t/\t/g; # bslash t => TAB used after time stamps
+ # following occurs in escape sequence inside some hints, e.g \l, \v, \n
+ s/\\\\/\\/g; # bslash bslash => bslash
+ print "$_";
+}
View
31 LogProcessing/unmapid-usna.pl
@@ -0,0 +1,31 @@
+#!/usr/bin/perl
+#
+# unmapid -- map anonymized USNA mid ids in input file using
+# external tab-delimited mapping file
+#
+# changes occurrences of an apparent anonymized mid id, once per line.
+# If multiple user ids map to same anonymized id, which one is chosen
+# is arbitrary
+
+# load mapping from "idmap.txt" in current working directory
+open MAPFILE, "<idmap.txt" or die "open: $!";
+%map = map /(.*)\t(.*)\r/, <MAPFILE>;
+close MAPFILE;
+%idmap = reverse %map;
+
+while (<>)
+{
+ s/\r$//;
+
+ # Match any five hex-digit string
+ if (/([0-9A-F][0-9A-F][0-9A-F][0-9A-F][0-9A-F])/) {
+ if ($newid = $idmap{$1}) {
+ # print "changing $& to $newid\n";
+ s/$&/$newid/;
+ } else {
+ print STDERR "no log idmap entry for $&\n";
+ }
+ }
+ print;
+}
+

0 comments on commit 6965350

Please sign in to comment.