diff --git a/LogProcessing/Info.sample/class-3326.xml b/LogProcessing/Info.sample/class-3326.xml
new file mode 100755
index 000000000..67c219fc8
--- /dev/null
+++ b/LogProcessing/Info.sample/class-3326.xml
@@ -0,0 +1,7 @@
+<class>
+           <name>SP212 General Physics II</name>
+           <school>USNA</school>
+           <period>3326</period>
+	   <description>Electricity and Magnetism Spring 2007</description>
+	   <instructor>Don Treacy</instructor>
+</class>
diff --git a/LogProcessing/Info.sample/class-5526.xml b/LogProcessing/Info.sample/class-5526.xml
new file mode 100755
index 000000000..61e634bfa
--- /dev/null
+++ b/LogProcessing/Info.sample/class-5526.xml
@@ -0,0 +1,7 @@
+<class>
+           <name>SP212 General Physics II</name>
+           <school>USNA</school>
+           <period>5526</period>
+	   <description>Electricity and Magnetism Spring 2007</description>
+	   <instructor>Don Treacy</instructor>
+</class>
diff --git a/LogProcessing/Info.sample/class-6546.xml b/LogProcessing/Info.sample/class-6546.xml
new file mode 100755
index 000000000..c965e7823
--- /dev/null
+++ b/LogProcessing/Info.sample/class-6546.xml
@@ -0,0 +1,7 @@
+<class>
+           <name>SP212 General Physics II</name>
+           <school>USNA</school>
+           <period>6546</period>
+	   <description>Electricity and Magnetism Spring 2007</description>
+           <instructor>Mary Wintersgill</instructor>
+</class>
diff --git a/LogProcessing/Info.sample/classmap.txt b/LogProcessing/Info.sample/classmap.txt
new file mode 100755
index 000000000..110cf03de
--- /dev/null
+++ b/LogProcessing/Info.sample/classmap.txt
@@ -0,0 +1,79 @@
+DE16B	3326
+DE14D	3326
+DE099	3326
+DB8B7	3326
+DDEEF	3326
+090966	3326
+DDC25	3326
+DD961	3326
+DD8DD	3326
+DD7F9	3326
+DD71E	3326
+DD517	3326
+DD457	3326
+DD2BF	3326
+DD2A7	3326
+DCF0B	3326
+live4nectar	3326
+DCE39	3326
+DCD49	3326
+DCD19	3326
+DCD07	3326
+m095652	3326
+DCC4D	3326
+DCC3B	3326
+DC96B	3326
+DE20A	5526
+DE18F	5526
+DDD63	5526
+DDD33	5526
+DD925	5526
+DD64F	5526
+DD60D	5526
+DD54D	5526
+DD45D	5526
+DD3D3	5526
+DD1AB	5526
+DD199	5526
+DD01F	5526
+DCF65	5526
+DCFA1	5526
+DCDA9	5526
+DCB21	5526
+DC9B9	5526
+DC995	5526
+DC8F3	5526
+DC8E7	5526
+DC791	5526
+DC6EF	5526
+DC6E9	5526
+DE24F	6546
+DE22B	6546
+DE147	6546
+DE05D	6546
+DDF07	6546
+DDF01	6546
+DDF01	6546
+DDDFF	6546
+DDD57	6546
+DDD21	6546
+DDC1F	6546
+DDA87	6546
+DD955	6546
+DD8E3	6546
+DD7E7	6546
+DD74B	6546
+DD48D	6546
+DD2DD	6546
+DD193	6546
+DD06D	6546
+DD037	6546
+DCFDD	6546
+DCECF	6546
+DCECF	6546
+DCDCD	6546
+DCBF9	6546
+DCB03	6546
+DDACF	5526
+DD469	3326
+
diff --git a/LogProcessing/Info.sample/condition-control.xml b/LogProcessing/Info.sample/condition-control.xml
new file mode 100755
index 000000000..1866779a2
--- /dev/null
+++ b/LogProcessing/Info.sample/condition-control.xml
@@ -0,0 +1,4 @@
+	<condition>
+		<name>Katz Control</name>
+     		<type>Control</type>
+	</condition>
diff --git a/LogProcessing/Info.sample/condition-experiment1.xml b/LogProcessing/Info.sample/condition-experiment1.xml
new file mode 100755
index 000000000..f1410ce6c
--- /dev/null
+++ b/LogProcessing/Info.sample/condition-experiment1.xml
@@ -0,0 +1,4 @@
+	<condition>
+		<name>Katz Short KCD</name>
+     		<type>Experimental</type>
+	</condition>
diff --git a/LogProcessing/Info.sample/condition-experiment2.xml b/LogProcessing/Info.sample/condition-experiment2.xml
new file mode 100755
index 000000000..412d02184
--- /dev/null
+++ b/LogProcessing/Info.sample/condition-experiment2.xml
@@ -0,0 +1,4 @@
+	<condition>
+		<name>Katz Long KCD</name>
+     		<type>Experimental</type>
+	</condition>
diff --git a/LogProcessing/Info.sample/dataset.txt b/LogProcessing/Info.sample/dataset.txt
new file mode 100755
index 000000000..694fa31c3
--- /dev/null
+++ b/LogProcessing/Info.sample/dataset.txt
@@ -0,0 +1 @@
+USNA Physics Spring 2007
diff --git a/LogProcessing/Info.sample/unitmap.txt b/LogProcessing/Info.sample/unitmap.txt
new file mode 100755
index 000000000..9900e362b
--- /dev/null
+++ b/LogProcessing/Info.sample/unitmap.txt
@@ -0,0 +1,573 @@
+MOMR1A	Angular Momentum
+MOMR1B	Angular Momentum
+MOMR2A	Angular Momentum
+MOMR2B	Angular Momentum
+MOMR3A	Angular Momentum
+MOMR4A	Angular Momentum
+EQCAP1A	Capacitance
+EQCAP1B	Capacitance
+EQCAP1C	Capacitance
+EQCAP1D	Capacitance
+EQCAP2A	Capacitance
+EQCAP2B	Capacitance
+EQCAP3A	Capacitance
+EQCAP3B	Capacitance
+EQCAP4A	Capacitance
+EQCAP4B	Capacitance
+EQCAP5A	Capacitance
+EQCAP6A	Capacitance
+CAP1A	Capacitance
+CAP1B	Capacitance
+CAP2A	Capacitance
+CAP2B	Capacitance
+CAP3A	Capacitance
+CAP4A	Capacitance
+CAP5A	Capacitance
+CAP6A	Capacitance
+CAP6B	Capacitance
+CAP9A	Capacitance
+CAP9B	Capacitance
+ROTS1A	Circular Motion
+ROTS1B	Circular Motion
+ROTS1C	Circular Motion
+ROTS2A	Circular Motion
+ROTS3A	Circular Motion
+ROTS4A	Circular Motion
+ROTS5A	Circular Motion
+ROTS6A	Circular Motion
+ROTS6B	Circular Motion
+ROTS6C	Circular Motion
+ROTS7A	Circular Motion
+ROTS8A	Circular Motion
+ROTS8B	Circular Motion
+KIR1A	DC Circuits
+KIR1B	DC Circuits
+KIR2A	DC Circuits
+KIR3A	DC Circuits
+KIR3B	DC Circuits
+KIR3C	DC Circuits
+KIR4A	DC Circuits
+KIR5A	DC Circuits
+KIR7A	DC Circuits
+EPOW1	DC Circuits
+EPOW2	DC Circuits
+EPOW3	DC Circuits
+EPOW4	DC Circuits
+RC1A	DC Circuits
+RC1B	DC Circuits
+RC1C	DC Circuits
+RC2A	DC Circuits
+RC3A	DC Circuits
+RC3B	DC Circuits
+RC4A	DC Circuits
+RC4B	DC Circuits
+RC5A	DC Circuits
+RC6A	DC Circuits
+RC7A	DC Circuits
+RC7B	DC Circuits
+RC8	DC Circuits
+RC9	DC Circuits
+CHARGE1A	Electric Field
+CHARGE1B	Electric Field
+CHARGE2	Electric Field
+COUL1A	Electric Field
+COUL1B	Electric Field
+COUL1C	Electric Field
+COUL2A	Electric Field
+COUL2B	Electric Field
+COUL2C	Electric Field
+COUL3	Electric Field
+EFIELD1A	Electric Field
+EFIELD1B	Electric Field
+EFIELD1C	Electric Field
+EFIELD1D	Electric Field
+EFIELD1E	Electric Field
+EFIELD2	Electric Field
+EFIELD3	Electric Field
+EFIELD4A	Electric Field
+EFIELD4B	Electric Field
+FOR1A	Electric Field
+FOR1B	Electric Field
+FOR1C	Electric Field
+FOR2A	Electric Field
+FOR2B	Electric Field
+FOR4A	Electric Field
+FOR4B	Electric Field
+FOR5	Electric Field
+FOR7A	Electric Field
+FOR7B	Electric Field
+FOR8A	Electric Field
+FOR8B	Electric Field
+FOR9A	Electric Field
+FOR9B	Electric Field
+FOR10A	Electric Field
+FOR10B	Electric Field
+FOR11A	Electric Field
+FOR11B	Electric Field
+FOR11C	Electric Field
+ELEC1A	Electric Field
+ELEC1B	Electric Field
+ELEC2	Electric Field
+ELEC3B	Electric Field
+ELEC4B	Electric Field
+ELEC5B	Electric Field
+ELEC6B	Electric Field
+GAUSS1	Electric Field
+GAUSS3	Electric Field
+GAUSS4	Electric Field
+GAUSS5	Electric Field
+GAUSS6	Electric Field
+GAUSS8	Electric Field
+GAUSS9	Electric Field
+GAUSS10	Electric Field
+GAUSS11	Electric Field
+DIP1A	Electric Field
+DIP1B	Electric Field
+EPOT1A	Electric Potential
+EPOT1B	Electric Potential
+EPOT1C	Electric Potential
+EPOT2	Electric Potential
+POT1A	Electric Potential
+POT1B	Electric Potential
+POT2A	Electric Potential
+POT2B	Electric Potential
+POT2C	Electric Potential
+POT3A	Electric Potential
+POT3B	Electric Potential
+POT4	Electric Potential
+POT5	Electric Potential
+POT6	Electric Potential
+POT7	Electric Potential
+POT8	Electric Potential
+FARA1A	Electromagnetic Induction
+FARA1B	Electromagnetic Induction
+FARA2A	Electromagnetic Induction
+FARA2B	Electromagnetic Induction
+FARA3A	Electromagnetic Induction
+FARA3B	Electromagnetic Induction
+FARA4A	Electromagnetic Induction
+FARA4B	Electromagnetic Induction
+FARA5A	Electromagnetic Induction
+FARA5B	Electromagnetic Induction
+FARA5C	Electromagnetic Induction
+FARA6A	Electromagnetic Induction
+FARA6B	Electromagnetic Induction
+FARA7A	Electromagnetic Induction
+FARA7B	Electromagnetic Induction
+FARA7C	Electromagnetic Induction
+FARA7D	Electromagnetic Induction
+FARA8A	Electromagnetic Induction
+FARA8B	Electromagnetic Induction
+FARA8C	Electromagnetic Induction
+FARA9	Electromagnetic Induction
+FARA10A	Electromagnetic Induction
+FARA10B	Electromagnetic Induction
+FARA11A	Electromagnetic Induction
+FARA11B	Electromagnetic Induction
+AMP1	Electromagnetic Induction
+EMWAVE1	Electromagnetic Waves
+WAVE19	Electromagnetic Waves
+EMWAVE3A	Electromagnetic Waves
+EMWAVE4	Electromagnetic Waves
+EMWAVE5	Electromagnetic Waves
+E1A	Energy-Work
+E1B	Energy-Work
+E1C	Energy-Work
+E2A	Energy-Work
+E2B	Energy-Work
+E2C	Energy-Work
+E3A	Energy-Work
+E4A	Energy-Work
+E4B	Energy-Work
+E4C	Energy-Work
+E5A	Energy-Work
+E5B	Energy-Work
+E6A	Energy-Work
+E7A	Energy-Work
+E7B	Energy-Work
+E8A	Energy-Work
+E8B	Energy-Work
+E9A	Energy-Work
+E9B	Energy-Work
+E10A	Energy-Work
+E11A	Energy-Work
+WE1A	Energy-Work
+WE2A	Energy-Work
+WE3A	Energy-Work
+WE4A	Energy-Work
+WE5	Energy-Work
+WE6	Energy-Work
+WE8	Energy-Work
+WE9	Energy-Work
+EGRAV1	Energy-Work
+FLUIDS1	Fluids
+FLUIDS2	Fluids
+FLUIDS3	Fluids
+FLUIDS4	Fluids
+FLUIDS5	Fluids
+FLUIDS6	Fluids
+FLUIDS7	Fluids
+FLUIDS8	Fluids
+FLUIDS9	Fluids
+FLUIDS11	Fluids
+FLUIDS12	Fluids
+FLUIDS13	Fluids
+FLUIDS14	Fluids
+FLUIDS15	Fluids
+FBD1A	Free Body Diagrams
+FBD1B	Free Body Diagrams
+FBD2A	Free Body Diagrams
+FBD3A	Free Body Diagrams
+FBD4A	Free Body Diagrams
+FBD5A	Free Body Diagrams
+FBD6A	Free Body Diagrams
+FBD8	Free Body Diagrams
+FBD9	Free Body Diagrams
+IND1A	Inductance
+IND1B	Inductance
+IND1C	Inductance
+IND2A	Inductance
+IND3A	Inductance
+IND3B	Inductance
+IND3C	Inductance
+IND4	Inductance
+LR1A	Inductance
+LR1B	Inductance
+LR1C	Inductance
+LR1D	Inductance
+LR2A	Inductance
+LR2B	Inductance
+LR3A	Inductance
+LR3B	Inductance
+LC1A	Inductance
+LC2A	Inductance
+LC2B	Inductance
+LRC1A	Inductance
+LRC2A	Inductance
+LMOM1A	Linear Momentum
+LMOM1B	Linear Momentum
+LMOM2A	Linear Momentum
+LMOM2B	Linear Momentum
+LMOM3A	Linear Momentum
+LMOM4A	Linear Momentum
+LMOM5	Linear Momentum
+LMOM6	Linear Momentum
+LMOM7	Linear Momentum
+PGRAPH1	Linear Momentum
+PGRAPH2	Linear Momentum
+PGRAPH3	Linear Momentum
+IMP1	Linear Momentum
+IMP2	Linear Momentum
+IMP3A	Linear Momentum
+IMP3B	Linear Momentum
+IMP3C	Linear Momentum
+CM1	Linear Momentum
+CM2	Linear Momentum
+CM3	Linear Momentum
+ROC1	Linear Momentum
+ROC2	Linear Momentum
+ROC3	Linear Momentum
+ROC4	Linear Momentum
+ROC5	Linear Momentum
+ROC6	Linear Momentum
+MAG1A	Magnetic Field
+MAG1B	Magnetic Field
+MAG1C	Magnetic Field
+MAG2A	Magnetic Field
+MAG2B	Magnetic Field
+MAG3A	Magnetic Field
+MAG3B	Magnetic Field
+MAG4A	Magnetic Field
+MAG5A	Magnetic Field
+MAG5B	Magnetic Field
+MAGTOR1A	Magnetic Field
+MAGTOR1B	Magnetic Field
+MAGTOR1C	Magnetic Field
+MAGTOR1D	Magnetic Field
+MAGDIP1	Magnetic Field
+MAGDIP2	Magnetic Field
+MAGDIP3	Magnetic Field
+MAGDIP4	Magnetic Field
+MAG6A	Magnetic Field
+MAG6B	Magnetic Field
+MAG6C	Magnetic Field
+MAG7	Magnetic Field
+MAG8A	Magnetic Field
+MAG8B	Magnetic Field
+MAG9	Magnetic Field
+MAG10	Magnetic Field
+MAG11	Magnetic Field
+MAG12	Magnetic Field
+MIRROR1	Optics
+MIRROR2	Optics
+MIRROR3	Optics
+MIRROR4	Optics
+LENS1A	Optics
+LENS1B	Optics
+LENS2A	Optics
+LENS2B	Optics
+LENS3A	Optics
+LENS3B	Optics
+LENS4A	Optics
+LENS4B	Optics
+LENS5A	Optics
+LENS5B	Optics
+REF1	Optics
+REF2A	Optics
+REF2B	Optics
+REF2C	Optics
+REF3A	Optics
+REF3B	Optics
+REF4A	Optics
+REF4B	Optics
+REF5A	Optics
+REF5B	Optics
+REF6	Optics
+INT1A	Optics
+INT1B	Optics
+INT1C	Optics
+INT1D	Optics
+INT2A	Optics
+INT2B	Optics
+OSC1	Oscillations
+OSC2	Oscillations
+OSC3	Oscillations
+OSC4	Oscillations
+OSC5	Oscillations
+OSC6	Oscillations
+OSC7	Oscillations
+OSC8	Oscillations
+POW1A	Power
+POW1B	Power
+POW2A	Power
+POW3A	Power
+POW4A	Power
+POW4B	Power
+POW5A	Power
+POW5B	Power
+POW5C	Power
+POW5D	Power
+POW6A	Power
+EQRES1A	Resistance
+EQRES1B	Resistance
+EQRES1C	Resistance
+EQRES1D	Resistance
+EQRES1E	Resistance
+EQRES2A	Resistance
+EQRES2B	Resistance
+EQRES3A	Resistance
+EQRES3B	Resistance
+EQRES4A	Resistance
+EQRES4B	Resistance
+EQRES5A	Resistance
+EQRES6A	Resistance
+EQRES7A	Resistance
+EQRES7B	Resistance
+EQRES8A	Resistance
+EQRES8B	Resistance
+DR1A	Rotational Dynamics
+DR2A	Rotational Dynamics
+DR2B	Rotational Dynamics
+DR3A	Rotational Dynamics
+DR4A	Rotational Dynamics
+DR5A	Rotational Dynamics
+DR6A	Rotational Dynamics
+DR6B	Rotational Dynamics
+DR7A	Rotational Dynamics
+DR8A	Rotational Dynamics
+EROT2	Rotational Dynamics
+EROT3	Rotational Dynamics
+EROT4	Rotational Dynamics
+GRAV1	Rotational Dynamics
+GRAV2	Rotational Dynamics
+GRAV3	Rotational Dynamics
+GRAV4	Rotational Dynamics
+GRAV5	Rotational Dynamics
+KR8	Rotational Kinematics
+KR9	Rotational Kinematics
+KR1A	Rotational Kinematics
+KR1B	Rotational Kinematics
+KR1C	Rotational Kinematics
+KR2A	Rotational Kinematics
+KR2B	Rotational Kinematics
+KR3A	Rotational Kinematics
+KR3B	Rotational Kinematics
+KR3C	Rotational Kinematics
+KR4A	Rotational Kinematics
+KR4B	Rotational Kinematics
+KR5A	Rotational Kinematics
+KR6A	Rotational Kinematics
+KR7A	Rotational Kinematics
+S1A	Statics
+S1B	Statics
+S1C	Statics
+S1D	Statics
+S1E	Statics
+S1F	Statics
+S2A	Statics
+S2B	Statics
+S2C	Statics
+S2D	Statics
+S2E	Statics
+S3A	Statics
+S3B	Statics
+S3C	Statics
+S4A	Statics
+S4B	Statics
+S5A	Statics
+S6A	Statics
+S7A	Statics
+S7B	Statics
+S8A	Statics
+S9A	Statics
+S10A	Statics
+S11A	Statics
+S11B	Statics
+S12A	Statics
+S13	Statics
+S14	Statics
+S15	Statics
+S16	Statics
+S17	Statics
+DQ1	Translational Dynamics
+DT1A	Translational Dynamics
+DT1B	Translational Dynamics
+DT1C	Translational Dynamics
+DT2A	Translational Dynamics
+DT3A	Translational Dynamics
+DT3B	Translational Dynamics
+DT3C	Translational Dynamics
+DT4A	Translational Dynamics
+DT4B	Translational Dynamics
+DT5A	Translational Dynamics
+DT6A	Translational Dynamics
+DT6B	Translational Dynamics
+DT6C	Translational Dynamics
+DT7A	Translational Dynamics
+DT7B	Translational Dynamics
+DT8A	Translational Dynamics
+DT9A	Translational Dynamics
+DT10A	Translational Dynamics
+DT11A	Translational Dynamics
+DT11B	Translational Dynamics
+DT12A	Translational Dynamics
+DT13A	Translational Dynamics
+DT13B	Translational Dynamics
+DT14A	Translational Dynamics
+DT14B	Translational Dynamics
+DT16	Translational Dynamics
+DT17	Translational Dynamics
+DT18	Translational Dynamics
+DT19	Translational Dynamics
+KT1A	Translational Kinematics
+KT1B	Translational Kinematics
+KT2A	Translational Kinematics
+KT2B	Translational Kinematics
+KT3A	Translational Kinematics
+KT3B	Translational Kinematics
+KT4A	Translational Kinematics
+KT4B	Translational Kinematics
+KT5A	Translational Kinematics
+KT6A	Translational Kinematics
+KT6B	Translational Kinematics
+KT7A	Translational Kinematics
+KT7B	Translational Kinematics
+KT8A	Translational Kinematics
+KT8B	Translational Kinematics
+KT9A	Translational Kinematics
+KT9B	Translational Kinematics
+KT10A	Translational Kinematics
+KT10C	Translational Kinematics
+KT11A	Translational Kinematics
+KT11B	Translational Kinematics
+KT12A	Translational Kinematics
+KT12B	Translational Kinematics
+KT12C	Translational Kinematics
+KT13A	Translational Kinematics
+KT13B	Translational Kinematics
+KT13C	Translational Kinematics
+KT14A	Translational Kinematics
+KT14B	Translational Kinematics
+KGRAPH1	Translational Kinematics
+KGRAPH2	Translational Kinematics
+KGRAPH3	Translational Kinematics
+KGRAPH4	Translational Kinematics
+KGRAPH5	Translational Kinematics
+KGRAPH6	Translational Kinematics
+KGRAPH7	Translational Kinematics
+KGRAPH8	Translational Kinematics
+KGRAPH9	Translational Kinematics
+KGRAPH10	Translational Kinematics
+KGRAPH11	Translational Kinematics
+KGRAPH12	Translational Kinematics
+KGRAPH13	Translational Kinematics
+KGRAPH14	Translational Kinematics
+KGRAPH16	Translational Kinematics
+KGRAPH17	Translational Kinematics
+KGRAPH18	Translational Kinematics
+KGRAPH19	Translational Kinematics
+KGRAPH20	Translational Kinematics
+KGRAPH21	Translational Kinematics
+VEC1A	Vectors
+VEC1B	Vectors
+VEC1C	Vectors
+VEC1D	Vectors
+VEC2A	Vectors
+VEC2B	Vectors
+VEC2C	Vectors
+VEC2D	Vectors
+VEC3A	Vectors
+VEC3B	Vectors
+VEC3C	Vectors
+VEC4A	Vectors
+VEC4B	Vectors
+VEC4C	Vectors
+VEC4D	Vectors
+VEC5A	Vectors
+VEC5B	Vectors
+VEC5C	Vectors
+VEC5D	Vectors
+VEC6A	Vectors
+VEC6B	Vectors
+VEC6C	Vectors
+VEC6D	Vectors
+VEC7A	Vectors
+VEC8A	Vectors
+VEC8B	Vectors
+VEC8C	Vectors
+VEC9	Vectors
+RELVEL1A	Vectors
+RELVEL2A	Vectors
+RELVEL3A	Vectors
+MOT1	Vectors
+MOT2	Vectors
+MOT3	Vectors
+MOT4	Vectors
+WAVE1	Waves
+WAVE2	Waves
+WAVE3	Waves
+WAVE4	Waves
+WAVE5	Waves
+WAVE6	Waves
+WAVE8	Waves
+WAVE9	Waves
+WAVE10	Waves
+WAVE11	Waves
+WAVE12	Waves
+WAVE13	Waves
+WAVE14	Waves
+WAVE15	Waves
+WAVE16	Waves
+WAVE17	Waves
+WAVE18	Waves
+WAVE24	Waves
+FOR4A	Electric Field
+FOR4B	Electric Field
+ELEC1A	Electric Field
+FOR4C	Electric Field
+ELEC2	Electric Field
+ELEC3A	Electric Field
+ELEC4A	Electric Field
+ELEC5A	Electric Field
+ELEC6A	Electric Field
+ELEC7A	Electric Field
\ No newline at end of file
diff --git a/LogProcessing/OLI Log Processing.html b/LogProcessing/OLI Log Processing.html
new file mode 100755
index 000000000..675bb6d2f
--- /dev/null
+++ b/LogProcessing/OLI Log Processing.html	
@@ -0,0 +1,357 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html>
+<head>
+  <meta content="text/html; charset=ISO-8859-1"
+ http-equiv="content-type">
+  <title>ANDES Log Processing</title>
+</head>
+<body>
+<h1>Processing ANDES Logs from OLI</h1>
+<br>
+This document explains how to obtain Andes log sets from OLI, anonymize
+them, and convert them into datashop format. All scripts for dealing
+with log files can be found in the LogProcessing directory of the Andes
+tree.<br>
+<br>
+<h2>Obtaining Logs from OLI</h2>
+OLI logs can be retreived from the oli QA server,
+oli-qa.andrew.cmu.edu, using OLI's <span style="font-style: italic;">Data
+Extraction tool.</span> OLI will have to give you an account and
+configure your access to the data extraction tool, after which a link
+for it will show up as a long on the top right of your start page when
+you log in to OLI.<br>
+<br>
+Logs on the QA server are mirrors of those on the OLI production
+server. These mirrors are made periodically, typically when QA software
+is updated and at other irregular intervals. So, they typically lag the
+data on production by a few weeks. It is not allowed to retrieve logs
+from the production server since the process can overload the server. <br>
+<br>
+For courses hosted on the PSLC server, the Data Extraction tool may be
+made available on the live server at any time.<br>
+<br>
+<h2>Running the Data Extraction tool</h2>
+After clicking the Data Extraction tool, you are walked through a
+series of screens to define a query. You should make the following
+selections:<br>
+<br>
+&nbsp;&nbsp; Step 1: Select scope of data<br>
+&nbsp;&nbsp;&nbsp;&nbsp; Go down to "Course Sections" and select the
+course sections you want. The available ones for you should be
+highlighted. OLI may have to configure your access to particular course
+logs.
+Otherwise you will see them but not be able to select them in the Data
+Extraction tool interface. <br>
+&nbsp;&nbsp;&nbsp; Do not select anything else on this page. You do not
+normally want to select by content package, for example.<br>
+&nbsp;&nbsp;&nbsp; Click Go to Step 2<br>
+<br>
+&nbsp;&nbsp;&nbsp; Step 2: Options<br>
+&nbsp;&nbsp;&nbsp; &nbsp;&nbsp; &nbsp;&nbsp; <span
+ style="font-weight: bold;">Columns</span>: This selects which columns
+from the OLI log database will be included in the output in a
+comma-separated format. The essential one is the "info" column. The
+info column records application-specific information. In our
+case,&nbsp; there a single row in the OLI log database corresponding to
+the event of Andes uploading a log file at the end of an Andes problem
+session. This row includes the entire text of an Andes session logfile
+in the "info" field, so info should always remain checked. <br>
+&nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; One should also check the "Action
+TimeStamp"&nbsp; to receive the server's time at the time the log was
+made. This is useful to have because the date and time shown<span
+ style="font-style: italic;"> inside</span> the Andes logs is derived
+from the user's system clock, which may be incorrectly set. So the OLI
+recorded event time is more reliable to have. This time should be very
+close to the time of the <span style="font-style: italic;">end </span>of
+the Andes session, when the log file was uploaded. <br>
+&nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;&nbsp; One may check the Action
+Time Zone for a more complete specification about the time.
+However,&nbsp; since this event is recorded by the OLI server, this
+does not really add information -- the times here should always be in
+the Eastern time zone where the OLI servers are, even if the students
+are working in different time zones. <br>
+&nbsp;&nbsp;&nbsp; &nbsp;&nbsp; &nbsp;&nbsp; All other columns should
+be unchecked to keep the output simple.<br>
+<br>
+&nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;&nbsp;&nbsp; Notes: In theory
+there might be some use for the User Id column, which will show an
+anonymized OLI user id (into a long ugly GUID), or the Session GUID,
+which identifes an OLI login session. This detail could be used to
+correlate Andes logs with other OLI, non-ANDES log entries made by the
+same student or within the same OLI login session. For example, one can
+also use the Data Extraction tool to retrieving logs showing access to
+learning pages in the course. With that result, one could determine
+which learning pages the student visited in the same session as the
+Andes log was made.&nbsp; However, we have never made any use of this
+information up to now. <br>
+&nbsp;&nbsp;&nbsp; <br>
+&nbsp;&nbsp;&nbsp; The question has sometimes come up as to whether we
+can determine when students view training videos on OLI from their
+logs. Right now, there is no record made in the OLI log database of
+these events, because they are just serves of&nbsp; webcontent that do
+not pass through the OLI courseware system. On way around this would be
+to wrap each video in its own learning page. In this case there <span
+ style="font-style: italic;">would </span>be a log of those learning
+page accesses. However, that would not show whether the student had
+launched the video or not, merely whether they had opened the page on
+which it resides. Another possible way might be to convert the videos
+to Flash format, and make use of Flash media logging support built into
+certain OLI tools. This would log media viewing events in the
+DataShop's xml notation, and these events could then be retrieved <br>
+<br>
+&nbsp;&nbsp;&nbsp; &nbsp;&nbsp; &nbsp;&nbsp; <span
+ style="font-weight: bold;">Actions</span>: Select Andes. This is a
+custom action which indicates uploading an Andes log at the end of an
+Andes problem session.<br>
+<br>
+&nbsp;&nbsp;&nbsp; &nbsp;&nbsp; &nbsp;&nbsp; <span
+ style="font-weight: bold;">Filters</span>: leave blank<br>
+<br>
+&nbsp;&nbsp;&nbsp; &nbsp;&nbsp; &nbsp;&nbsp; <span
+ style="font-weight: bold;">Options</span>:&nbsp; Normally one should
+select a date range which includes all logs in the course but exclude
+other semesters that might contain the same student. The dates do
+not&nbsp; have to be exact . For example, for a Spring term 2007
+course, it could suffice to select a range from 2007-01-01 to
+2007-06-01, even if the exact course date range was somewhat narrower. <br>
+<br>
+&nbsp;&nbsp;&nbsp; &nbsp;&nbsp; &nbsp;&nbsp; The reason for this is
+that OLI does not actually record course information in the log
+database. Rather, when you request logs for a course, the Data
+Extraction server looks up the course roster and translates this into a
+query for logs by all students on that roster. If a student was in both
+a fall term and spring term course, this will retrieve logs from
+outside of the course.&nbsp; <br>
+<br>
+&nbsp;&nbsp;&nbsp; &nbsp;&nbsp; &nbsp;&nbsp; All other options should
+be left as defaults.<br>
+<br>
+At Step 3, the query is submitted and you should start downloading a
+zip file named data.zip. However, this step often times out or fails if
+the server is heavily
+loaded. In that case you just have to try again at a different time.<br>
+<br>
+&nbsp;&nbsp;&nbsp; The data.zip itself contains a single file named
+actions.csv, with all the data. Save this zip file with some
+identifying name like Fall2007-USNA.dat.z <br>
+<h4>Concatenated log format</h4>
+&nbsp;&nbsp;&nbsp; The embedded file you get is formally a csv file
+showing selected rows in a database, like a
+spreadsheet. The first line gives the column headings and subsequent
+lines give comma-separated values. As noted above, the
+last "column" of each line is the full Andes log, which normally
+includes multiple text lines within it.&nbsp; Effectively that makes
+this file a concatenated sequence of Andes logs, with a
+little bit of cruft before the initial log header line. <br>
+<br>
+&nbsp;&nbsp;&nbsp; For most purposes the concatenated file can be
+processed as a unit. By using zcat and piping output to further
+processing, it may not even necessary to uncompress it. However, it
+could also be split into separate log files for processing
+-- see the splitlogs.pl tool in the Andes LogProcessing directory. <br>
+<br>
+&nbsp;&nbsp;&nbsp; Within the concatenated log, individual session logs
+can be found by searching for the Andes log header line, which will
+have the following format:<br>
+<br>
+2008/04/03 18:15:21,Eastern,# Log of Andes session begun Thursday,
+April 03, 2008 18:14:50 by [user] on [computer]<br>
+<br>
+&nbsp;&nbsp;&nbsp; Individual logs should each end with an END-LOG line<br>
+1:00&nbsp;&nbsp;&nbsp; END-LOG <br>
+&nbsp;&nbsp;&nbsp; However, in theory an error could prevent the
+END-LOG statement from getting into the log, so it is safest just to
+end a log at the header of the next one.<br>
+<br>
+&nbsp;&nbsp;&nbsp;&nbsp; Technically this file is in Unix text file
+format, in which there is a single newline characters at the end of
+each row from the database. However, the last column of all the rows
+after the first, header line, end with the full text of an Andes log
+file, and an Andes log file is tself a multi-line Windows-format&nbsp;
+text file in which the lines end with carriage-return linefeed pairs.
+One effect of this is that when viewed in a text editor, there will
+appears to be a blank line after each log file. <br>
+<br>
+&nbsp;&nbsp; <br>
+<h2><span style="font-weight: bold;">Anonymizing the logs</span></h2>
+<h2><span style="font-weight: bold;"></span></h2>
+<br>
+There are different scripts for anonymizing USNA logs, where our
+students are supposed to use their alphas, aka mid numbers, as user
+ids, and anonymizing arbitrary student logs, where the ids may take any
+form.<br>
+<h5>Anonymizing USNA midshipmen logs: mkanon-usna.pl<br>
+</h5>
+To anonymize mid logs, obtain the file mkanon-usna.pl.template. Edit it
+to include a different hash function in the munge_id routine to map the
+mid number into some anonymization code. Rename it to makanon-usna.pl.
+The basic way to anonymize is then:<br>
+&nbsp;&nbsp;&nbsp; &nbsp;&nbsp; &nbsp;&nbsp; zcat Fall2007-USNA.dat.z |
+perl mkanon-usna.pl&nbsp; &gt; Fall2007-USNA-anon.dat 2&gt; idmap.txt<br>
+<br>
+The standard output will be the anonymized concatenated log
+(uncompressed). The stderr output will contain any error messages, of
+which the most important is reports of ids that do not conform to the
+mid number patten. This will be followed by a tab-delimited listing of
+the id mapping file generated. This mapping should be saved reference
+and for use in applying the mapping to other files. So you should
+inspect idmap first, and edit out any error message before saving it.
+[Maybe better to just write idmap.txt file separately from error
+messages? ]<br>
+<br>
+A nuisance is that sometimes the same real student will have created
+two or more different OLI ids. Also, a student may not have used a mid
+number at all as an id. Normally we ask instructors to grant us TA
+access to courses we
+support so we can inspect the gradebook and rosters, so the usual way
+to identify such students is to inspect the OLI section rosters, which
+will show students with the same real name on different lines. <br>
+<br>
+Both these issues can be handled by creating a file named merge-ids.txt
+in the directory in which the script is run. This should be
+tab-separated two column listing mapping login ids to canonical&nbsp;
+user ids: instances of the first id will be mapped to the second before
+anonymizing. The second id need never have been actually used, e.g. for
+someone who used a non-mid number id, you could just map it to a mid
+number. <br>
+<br>
+Note the anonymization will already merge students who used two user
+ids consisting of a mid number with and without an initial "m" since it
+anonymizes based on the mid number itself. So no special entry is
+required for these.<br>
+<h6>Applying the id map to other files: mapid-usna.pl<br>
+</h6>
+The mapping defined in the saved file idmap.txt can be applied to other
+text files, e.g.a list of questionnaire respondants or students in some
+experimental condition, by using the script mapid-usna.pl.<br>
+&nbsp;&nbsp;&nbsp; &nbsp;&nbsp; mapid-usna.pl &lt; infile &gt; outfile<br>
+This script looks for a file named idmap.txt in the current directory.
+This also should be customized from the template file to include the
+same mapping function used in mkanon-usna. This will apply the map to
+mid numbers in the&nbsp; If an id is not found in the map, it will use
+the hashing algorithm to generate one, generating an error message. The
+advantage of&nbsp; re-using the log-generated mapping file is that it
+tells us when it encounters a user id for which no logs were found in
+the set. This may indicate an anomaly requiring investigation, e.g. a
+questionnaire respondant who entered his user id incorrectly.<br>
+<h5>Anonymizing arbitrary logs</h5>
+Student ids in non-USNA datasets can be anonymized with the mkanon.pl
+and mapid.pl scripts. These don't use a hashing algorithm. Rather, they
+simply generate ids by adding numbers to a specified prefix. To use
+these, create a&nbsp; tab-separated mapping file (of any name)
+initialized to contain a special mapping for PREFIX, e.g<br>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; PREFIX\tWH081<br>
+Then do<br>
+&nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; perl mkanon.pl
+mapfile.txt &lt; log &gt; newlog<br>
+where mapfile.txt is the mapping file. This will update mapfile.txt
+with the new mapping file. <br>
+<br>
+This can be done multiple times; in each case, an existing mapping will
+be used if found, and the mapping file will be updated with any new
+entries at the end.<br>
+<br>
+As above, the mapping can be applied to arbitrary text by<br>
+&nbsp;&nbsp;&nbsp; &nbsp;&nbsp; perl mapid.pl mapfile.txt &lt; old &gt;
+new<br>
+<br>
+<h2>Converting Raw Logs to DataShop Format</h2>
+The script log2xml.pl can be used to convert an anonymized dataset into
+files in the xml format the DataShop uses for import into their
+database. By the datashop's request, this will generate one folder per
+student, and one xml log file per session log. At the end of the
+conversion, this should be zipped up and delivered to the DataShop for
+dropoff. The DataShop should give you ssh access to a dropbox location
+on their "cooker" server for this purpose.<br>
+<h5>Info files</h5>
+Although the basic conversion is simple, the converter makes use of
+several external files to patch in information not included in the
+Andes raw logs. It will look for them in a subdirectory named "Info"
+within its working directory when run. <br>
+<br>
+Required:<br>
+&nbsp;&nbsp; dataset.txt -- contains dataset name <br>
+Optional:<br>
+&nbsp;&nbsp; classmap.txt -- student to class id mapping. <br>
+&nbsp;&nbsp; class-XXX.xml -- class XML element for class w/id XXX<br>
+&nbsp;&nbsp; conditionmap.txt -- student to condition id mapping<br>
+&nbsp;&nbsp; condition-XXX.xml -- condition element for id XXX<br>
+&nbsp;&nbsp; unitmap.txt&nbsp; -- problem to unit mapping<br>
+<br>
+The dataset.txt file must exit. All others are optional, if not found,
+this info will not be included in the conversion.<br>
+<br>
+Samples of these files can be viewed in the directory Info.sample in
+the LogProcessing directory. <br>
+<h6>Class information</h6>
+The classmap file is a two-column tab-separated mapping of student
+names to some string identifying the class they were in. This could be
+a section number like "3346" or it could just be an instructor name
+like "gershmann". The class-XXX.xml then gives the xml element to patch
+into the conversion for specifying the student's class.&nbsp; For
+example:<br>
+<br>
+&lt;class&gt;<br>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+&lt;name&gt;SP212 General Physics II&lt;/name&gt;<br>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+&lt;school&gt;USNA&lt;/school&gt;<br>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+&lt;period&gt;6546&lt;/period&gt;<br>
+&nbsp;&nbsp;&nbsp; &nbsp;&nbsp; &lt;description&gt;Electricity and
+Magnetism Spring 2007&lt;/description&gt;<br>
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+&lt;instructor&gt;Mary Wintersgill&lt;/instructor&gt;<br>
+&lt;/class&gt;<br>
+<br>
+If all this information is not known, then it need not be included. <br>
+<br>
+This information must be obtained from OLI rosters or other sources
+from the instructor or experimenter. One way is to build a spreadsheet
+or list, cut the column or real names and save to a file, anonymize
+that file, and paste it back into the spreadsheet, then save in
+tab-delimited format.<br>
+<h6>Condition information</h6>
+Experimental condition information may come from two sources: it may be
+included in the logs, if OLI had been customized for different
+conditions, as in Sandy Katz's USNA experiments. In this case the
+condition will always be one of "control", "experiment", "experiment1"
+or "experiment2", which are the only possible values defined in our OLI
+course. In this case no conditionmap file is needed.<br>
+<br>
+Alternatively, the condition information may be patched in via a
+conditionmap.txt file, which maps student ids to experiment condition
+ids. This is more common for logs from lab studies. <br>
+<br>
+Again, the ids used here are arbitrary. The xml element
+condition-XXX.xml will be patched into the converted log to explain the
+student's experiment condition. A sample is<br>
+<br>
+&nbsp;&nbsp;&nbsp; &lt;condition&gt;<br>
+&nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &lt;name&gt;Katz Reflective
+Dialogue&lt;/name&gt;<br>
+&nbsp;&nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;
+&lt;type&gt;Experimental&lt;/type&gt;<br>
+&nbsp;&nbsp;&nbsp; &lt;/condition&gt;<br>
+<br>
+Note a conditionmap will take priority over conditions found in the
+logs. It is not required to include any condition information.<br>
+<br>
+I believe the datashop format does not support multiple conditions in a
+log, even though a USNA student might have participated in multiple
+experiments, say a lab study and also a longer study like Sandy Katz's.
+<br>
+<h6>Unit information</h6>
+The converter also makes use of a mapping from problem id to problem
+set, e.g kt1 -&gt; Translational Kinematics, since this information is
+not in the logs. This is contained in the file unitmap.txt. This file
+can be generated from a set of aps files by the script&nbsp;
+mkunitmap.pl as<br>
+&nbsp;&nbsp;&nbsp; &nbsp;&nbsp; mkunitmap.pl *.aps &gt; unitmap.txt<br>
+<br>
+&nbsp;It changes rarely so it can usually just be copied. <br>
+<br>
+<br>
+<br>
+</body>
+</html>
diff --git a/LogProcessing/log2xml.pl b/LogProcessing/log2xml.pl
index 633e2cde6..374eb069a 100755
--- a/LogProcessing/log2xml.pl
+++ b/LogProcessing/log2xml.pl
@@ -1,3 +1,4 @@
+#!/usr/bin/perl
 #####################################################################
 # log2xml -- convert ANDES log files into PSLC Datashop XML format
 #
@@ -10,7 +11,8 @@
 # output file is found in:
 #              student-id/session-id.xml
 #
-# Uses the following files from its working directory:
+# Looks for the following supporting files in the Info subdirectory of 
+# its current working directory:
 # Required:
 #   dataset.txt -- contains dataset name 
 # Optional:
@@ -27,10 +29,14 @@
 # provide only the single file class.xml
 #
 # Condition ids may come from set-condition statement in log
-# or from condition-map. There may be no conditions to set.
+# or from condition-map, with condition-map taking precedence. 
+# There may be no conditions to set.
+#
+# The unitmap file needs to change only if new problems are added 
+# It can be generated by the mkunitmap.pl script.
 #
 ######################################################################
-my $revision_string = '$Revision: 1.10 $';
+my $revision_string = '$Revision: 1.11 $';
 
 # globals for current log line
 my ($timestamp, $event, $argstr); 
@@ -114,7 +120,7 @@
 
 # load the problem to unit mapping table, if it exists
 if (open UNITMAP, "<Info/unitmap.txt") {
-     %unitmap = map /(.*)\t(.*)\r/, <UNITMAP>;
+     %unitmap = map /(.*)\t([^\r]*)/, <UNITMAP>;
      close UNITMAP;
      $have_unitmap = 1;
      #print STDERR "loaded unit map\n";
@@ -124,7 +130,7 @@
 # we may get a condition from a conditionmap file, or else from set-condition
 # above. If both are set, condition map will override.
 if (open CONDITIONMAP, "<Info/conditionmap.txt") {
-	%conditionmap = map /(.*)\t(.*)\r/, <CONDITIONMAP>;
+	%conditionmap = map /(.*)\t([^\r]*)/, <CONDITIONMAP>;
         close CONDITIONS;
 	$have_conditionmap = 1;
      #print STDERR "loaded condition map\n";
@@ -132,7 +138,7 @@
 
 # load the student to class mapping table, if it exists
 if (open CLASSMAP, "<Info/classmap.txt") {
-	%classmap = map /(.*)\t(.*)\r/, <CLASSMAP>;
+	%classmap = map /(.*)\t([^\r]*)/, <CLASSMAP>;
         close CLASSMAP;
 	$have_classmap = 1;
 }
@@ -142,7 +148,7 @@
 #--------------------------------------------------------------------------------
 while (<>) {
     chomp($_); 
-    s/\r$//;   # delete dangling CR's remaining from Unix to DOS conversion
+    s/\r$//;   # cygwin PERL includes DOS-mode CR's by default, see PERLIO
 
     # Log header line begins a new log. We could reset and get date from this, but
     # in fact we get it from initial set-session-id call below. However, need to 
@@ -282,6 +288,11 @@
            $unit_level_begin = "<level type=\'module\'><name>$unit</name>";
 	   $unit_level_end = "</level>";
       } else { $unit_level_begin = $unit_level_end = "" };
+      # AW: group level is only defined to make it easy to select, say,
+      # all problems named CM*, in the datashop by selecting that group. 
+      # But datashop might let you do this anyway, even without the group level,
+      # just by using a pattern match on the problem name.  If so, then there 
+      # is not much point in including this.
       # try to include a group level using name prefix, if we find one 
       # e.g. cm1a => CM*, roc2a => ROC*, etc.
       $group = $problem;   # default if name doesn't include a number
diff --git a/LogProcessing/mapid-usna.pl.template b/LogProcessing/mapid-usna.pl.template
new file mode 100755
index 000000000..c39174de3
--- /dev/null
+++ b/LogProcessing/mapid-usna.pl.template
@@ -0,0 +1,45 @@
+#!/usr/bin/perl
+#
+# mapid -- map USNA mid ids in input file using external tab-delimited mapping 
+#          file
+#
+# changes occurrences of an apparent mid id , once per line.
+# An apparent mid id is any six digit number optionally preceded by an "m" or "M".
+
+# load mapping from "idmap.txt" in current working directory
+open MAPFILE, "<idmap.txt" or die "open: $!";
+%idmap = map /(.*)\t([^\r]*)/, <MAPFILE>;
+close MAPFILE;
+
+while (<>)
+{
+	s/\r$//; # cygwin perl includes CR from DOS-mode text files by default
+
+	# Match any six digit number, optionally preceded by an "m".
+	# Note numeric ids coming out of excel may lose initial
+	# zero -- must fix this if this script is to work.
+	# !!! also matches digit string with more than 6 digits -- fix?
+	if (/[Mm]?([\d][\d][\d][\d][\d][\d])/) {
+		$alpha =  $1;
+		if (($newid = $idmap{$alpha}) or 
+		    ($newid = $idmap{"m" . $alpha}) or
+		    ($newid = $idmap{"M" . $alpha}) ){
+			# print "changing $& to $newid\n";
+			s/$&/$newid/;	
+		} else {
+			&munge_id();
+			s/$&/$newid/;	
+			print STDERR "no log idmap entry for $& generated $newid\n";
+		}
+	}
+	print; 
+}
+
+sub munge_id () # reads global $alpha, sets global $newid
+{
+   $num = $alpha;
+   # simple sample mapping function
+   $num += 8765;
+   # treat new num string as integer and format in hex
+   $newid  = sprintf("%X", ($num + 0));
+}
diff --git a/LogProcessing/mapid.pl b/LogProcessing/mapid.pl
new file mode 100755
index 000000000..dc27a785f
--- /dev/null
+++ b/LogProcessing/mapid.pl
@@ -0,0 +1,73 @@
+#!/usr/bin/perl
+#
+# mapid --  map list of student ids in input file using external 
+#           tab-delimited mapping file, generating new id if not found
+#           and updating 
+#
+# Usage:   mapid.pl mapfile.txt [idfile.txt]
+# Reads id list from second argument, standard input if none. Writes
+# anonymized list to stdout, updating mapfile with newly generated
+# mappings if any. Also writes newly generated pairs to stderr.
+#
+# Mapfile is tab-delimited list of canonical-name anon-id pairs. It
+# should contain a special entry for "PREFIX" specifying the prefix
+# to use when generating ids. 
+#
+
+my $filename = $ARGV[0]; shift @ARGV;
+if (! $filename) { die "usage: mapid.pl mapfile.txt [inputfile]\n"; }
+
+# count of ids generated with this prefix
+my $id_counter = 1;
+
+# load existing mapping from specified mapping file
+open MAPFILE, "<$filename" or die "Couldn't open $filename for reading: $!\n"  ;
+%idmap = map /(.*)\t([^\r]*)/, <MAPFILE>; # gulp in hash; Allow DOS mode \r at eol
+close MAPFILE;
+if (1) { # debugging printout
+	$maplength = (keys idmap);
+	print STDERR "Input mapping ($maplength) entries\n";
+	while( ($k, $v) = each %idmap) {
+        	print STDERR "  |$k| |$v|\n";
+	}
+	print STDERR "End input mapping\n\n";
+}
+# ensure file includes prefix. Upper case ensures it can't conflict with
+# any canonicalized id.
+my $PREFIX_ID = "PREFIX";
+($prefix = $idmap{$PREFIX_ID}) or die "missing $PREFIX_ID entry in $filename\n";
+
+# set counter to one more than number of existing entries. Since
+# map contains a dummy prefix entry, this will be number of keys
+$id_counter = (keys idmap); 
+#print STDERR "loaded id map, counter= $id_counter\n";
+
+while (<>)
+{
+	s/\r$//;  # cygwin perl may include CR from DOS-mode text files
+        # ids may have spaces, dots, or odd chars. Assume begin and
+	# end with a word character.
+	if (/[\w].*[\w]/) {
+		# to ignore case differences, use canonical lower case
+		# form in mapping table. 
+		$canon_id = $id = $&;
+		$canon_id =~ tr/[A-Z]/[a-z]/;
+		if (! ($anonid = $idmap{$canon_id})) { 
+			$anonid = $idmap{$canon_id} = $prefix . $id_counter++;
+			print STDERR "$canon_id\t$anonid\n";
+			$modified_map = 1;
+		}
+		# NB: substitute for original, not canonical, form 
+		s/$id/$anonid/;	
+	}
+	print; 
+}
+
+# update mapping file if mapping has been extended.
+if ($modified_map) {
+ open MAPFILE, ">$filename" or die "Couldn't open $filename for updating: $!\n";
+ while( ($k, $v) = each %idmap) {
+        print MAPFILE "$k\t$v\r\n";
+ }
+ close MAPFILE;
+}
diff --git a/LogProcessing/mkanon-usna.pl.template b/LogProcessing/mkanon-usna.pl.template
new file mode 100755
index 000000000..efc8726f0
--- /dev/null
+++ b/LogProcessing/mkanon-usna.pl.template
@@ -0,0 +1,123 @@
+#!/usr/bin/perl
+#
+# mkanon -- anonymize USNA userids in Andes log files
+#
+# Usage:   reads one or more logs from standard input, writes to standard 
+#          output
+#
+# If input files have ids in their names, can write to a concatenated 
+# output file, then split out from there with splitlogs.
+#
+# Optional file merge-ids.txt in the current directory can specify
+# mapping from actual userids to effective user ids for anonymization. 
+# This can be used to map non-mid-number ids to mid numbers, or multiple
+# different ids to the same id, or numeric ids used by instructors to names.
+
+# old custom merge mapping
+#%merge = ( "gossard419" => "m092508",
+#            "blindmelon" => "m094098");
+
+if (open MERGEFILE, "<merge-ids.txt") {
+     %merge = map /(.*)\t([^\r]*)/, <MERGEFILE>;
+     close MERGEFILE;
+     #print STDERR "loaded merge map\n";
+}
+
+while (<>) 
+{
+  s/\r$//; # cygwin perl includes CR from DOS-mode text files by default
+  
+  # Log header part saying "by JoeSmith on Joes-Computer" can reveal identity
+  #      $1                               $2                    
+  if (/^(.*)# Log of Andes session begun (.*) by [\w-]+ on .*$/) 
+  {
+     print "$1# Log of Andes session begun $2 by [user] on [computer]\n";
+  }
+  #
+  # initial set session id call normally contains userid as part
+  #         $1                   $2     $3
+  elsif (/^(.*)set-session-id "([^"]*)"(.*)$/)
+  {
+	# Session id form is UserID-MonthDay-Hours-Mins-Secs
+	# but any spaces in UserID are converted to hyphens
+	# ($id, $rest) = split(/-/, $2, 2);
+	@id_parts = split('-', $2);
+	$nparts = @id_parts;
+	$id = $session_id = join(' ', @id_parts[0 .. $nparts-5]);
+	$rest = join('-', @id_parts[$nparts-4 .. $nparts-1]);
+	&munge_id(); 
+	print "$1set-session-id \"$newid-$rest\"$3\n";
+  }
+  #
+  # read-student-info call also has user name
+  #         $1                      $2     $3
+  elsif (/^(.*)read-student-info "([^"]*)"(.*)$/)
+  { 
+    $id = $2;  # session label should start with student id
+    if ($id ne $session_id) {
+	    print STDERR "warning: student id $id != session id part $session_id!\n";
+    }
+    &munge_id ();
+    print "$1read-student-info \"$newid\"$3\n";
+  }
+  #
+  # kcd urls in hints can contain the user id. Here we just map
+  # it anywhere it is found
+  #
+  elsif ($id && /$id/i) 
+  {
+    s/$id/$anonid/g;	
+    print;
+  } 
+ 
+  # !!! Standalone Andes also records interactions with Login dialog
+  # should note this and suppress characters while in this box
+  else { 
+	  print; 
+  }
+} # end while (<>)
+
+# at end of input, dump idmap so it can be saved
+foreach $id (sort keys %idmap) {
+     print STDERR "$id\t$idmap{$id}\n";
+}
+
+
+sub munge_id () # reads global $id, sets global $newid
+{
+    # check if we have noted this as a duplicate account of some student
+    $primary_id = $merge{$id} ? $merge{$id} : $id;
+
+    # check if it's a mid number which may occur with or without the initial "m" or "midn"
+    # prefix. If not, it may be a teacher or TA log 
+    # Note: pattern is anchored at beginning but not end of id, so allows trailing cruft in
+    # id after proper mid number. This allows for extra letter as in m102340x which was used.
+    # But also matches longer digit string as in 26384826 used by instructor McClanahan. 
+    # Maybe better to anchor at end and handle ids with trailing cruft via merge mechanism.
+    if ($primary_id =~ /^(m|mid|midn)?([\d][\d][\d][\d][\d][\d])/i) 
+    {
+	# following in case we want to map on substructure: year parts will all 
+	# be the same within a particular dataset.
+        # if ($primary_id =~ /^(m|mid|midn)?([\d][\d])([\d][\d][\d][\d])$/i) 
+	#  $yr = $2;	# first two digits are two digit class year: 07, 08, 09 etc.
+	#  $snum = $3;	# remaining four digits are student number 
+	$num= $2; 
+
+	# simple sample mapping function:
+	$num += 8765;
+
+	# treat new num string as integer and format in hex
+	$newid  = sprintf("%X", ($num + 0));
+	# remember mapping
+	$idmap{$id} = $newid;
+    } 
+    else {
+    	# !!! Put alternative method here for non-usna student names
+	if (! $warned{$id} ) {
+		print STDERR "Non mid id $id found in $_\n";
+		$warned{$id} = 1;
+	}
+	$newid = $id;
+	$idmap{$id} = $newid;
+   }
+}
diff --git a/LogProcessing/mkanon.pl b/LogProcessing/mkanon.pl
new file mode 100755
index 000000000..63af3ae2c
--- /dev/null
+++ b/LogProcessing/mkanon.pl
@@ -0,0 +1,144 @@
+#!/usr/bin/perl
+#
+# mkanon -- anonymize arbitrary userids in Andes log files, 
+#            reading and updating a mapping file
+#
+# Usage:   mkanon.pl mapfile.txt < logs > newlogs
+#
+# reads one or more logs from standard input, writing anonymized logs
+# to standard output. 
+#
+# Mapping file expected to be generated by mapid2.pl applied to list 
+# of user names.
+#
+# This works on a concatenated sequence of logs.  If input files are 
+# separate logs with user ids in their names, can write to a concatenated 
+# output file, then split individual logs out from there with splitlogs.
+#
+
+# Load mapping file
+my $filename = $ARGV[0]; shift @ARGV;
+if (! $filename) { die "usage: mapid.pl mapfile.txt [inputfile]\n"; }
+
+# count of ids generated with this prefix
+my $id_counter = 1;
+
+# load existing mapping from specified mapping file
+open MAPFILE, "<$filename" or die "Couldn't open $filename for reading: $!\n"  ;
+%idmap = map /(.*)\t(.*)\r/, <MAPFILE>; # gulps in hash. Assumes DOS mode \r is read
+close MAPFILE;
+if (0) { # debugging printout
+	$maplength = (keys idmap);
+	print STDERR "Input mapping ($maplength) entries\n";
+	while( ($k, $v) = each %idmap) {
+        	print STDERR "  |$k| |$v|\n";
+	}
+	print STDERR "End input mapping\n\n";
+}
+# ensure file includes prefix. Upper case ensures it can't conflict with
+# any canonicalized id.
+my $PREFIX_ID = "PREFIX";
+($prefix = $idmap{$PREFIX_ID}) or die "missing $PREFIX_ID entry in $filename\n";
+
+# set counter to one more than number of existing entries. Since
+# map contains a dummy prefix entry, this will be number of keys
+$id_counter = (keys idmap); 
+#print STDERR "loaded id map, counter= $id_counter\n";
+
+# Optional file merge-ids.txt in the current directory can specify
+# mapping from actual userids to effective user ids for anonymization. 
+# This can be used to map non-mid-number ids to mid numbers, or multiple
+# different ids to the same id, or numeric ids used by instructors to names.
+my %merge;
+if (open MERGEFILE, "<merge-ids.txt") {
+     %merge = map /(.*)\t([^\s]*)/, <MERGEFILE>;
+     close MERGEFILE;
+     #print STDERR "loaded merge map\n";
+}
+	   
+
+while (<>) 
+{
+  # cygwin perl on windows seems to read in the CR from a dos-mode (CRLF-terminated) text file.
+  # [Better would be to translate CRLF on read to a logical end of line represented as a single NL.)
+  # So strip any trailing CR, to ensure we have a line a string with a trailing logical NL marker only. 
+  # Note "print" without arguments will print the whole line including the NL, which does seem to 
+  # translate to a CRLF when done on Windows. When printing a constructed string, we have to include 
+  # a NL, even if copying a suffix pattern match, since patterns normally don't match the NL.
+  s/\r$//;
+  
+  # Log header part saying "by JoeSmith on Joes-Computer" can reveal identity
+  #      $1                               $2                    
+  if (/^(.*)# Log of Andes session begun (.*) by [\w-]+ on .*$/) 
+  {
+     print "$1# Log of Andes session begun $2 by [user] on [computer]\n";
+  }
+  # Standalone Andes also records typing in the Login dialog box
+  elsif (/^(.*)C 1127 (.*)$/) {
+	  print "$1C1127 ?\n";
+  }
+  #
+  # initial set session id call normally contains userid as part
+  #         $1                   $2     $3
+  elsif (/^(.*)set-session-id "([^"]*)"(.*)$/)
+  {
+	# Session id form is UserID-MonthDay-Hours-Mins-Secs
+	# but any spaces in UserID are converted to hyphens
+	@id_parts = split('-', $2);
+	$nparts = @id_parts;
+	$id = $session_id = join(' ', @id_parts[0 .. $nparts-5]);
+	$rest = join('-', @id_parts[$nparts-4 .. $nparts-1]);
+	&munge_id(); 
+	print "$1set-session-id \"$anonid-$rest\"$3\n";
+  }
+  #
+  # read-student-info call also has user name
+  #         $1                      $2     $3
+  elsif (/^(.*)read-student-info "([^"]*)"(.*)$/)
+  { 
+    $id = $2;  # session label should start with student id
+    if ($id ne $session_id) {
+	    print STDERR "warning: student id $id != session id part $session_id!\n";
+    }
+    &munge_id ();
+    print "$1read-student-info \"$anonid\"$3\n";
+  }
+  #
+  # Open-Problem can contain user ids in the solution directory path
+  #
+ 
+  #
+  # !!! kcd urls in hints can contain the user id
+  #
+  elsif ($id && /$id/i) 
+  {
+    s/$id/$anonid/g;	
+    print;
+  } 
+  else { 
+	  print; 
+  }
+} # end while (<>)
+
+# at end of input, dump idmap so it can be saved
+if ($modified_map) {
+ open MAPFILE, ">$filename" or die "Couldn't open $filename for updating: $!\n";
+ while( ($k, $v) = each %idmap) {
+        print MAPFILE "$k\t$v\r\n";
+ }
+ close MAPFILE;
+}
+
+
+sub munge_id () # reads global $id, sets global $anonid
+{
+    # check if we have noted this as a duplicate account of some student
+    $canon_id = $merge{$id} ? $merge{$id} : $id;
+    # canonicalize to lower case
+    $canon_id =~ tr/[A-Z]/[a-z]/;
+    if (! ($anonid = $idmap{$canon_id})) { 
+		$anonid = $idmap{$canon_id} = $prefix . $id_counter++;
+		print STDERR "$canon_id\t$anonid\n";
+		$modified_map = 1;
+    }
+}
diff --git a/LogProcessing/mkunitmap.pl b/LogProcessing/mkunitmap.pl
new file mode 100755
index 000000000..920bb0e69
--- /dev/null
+++ b/LogProcessing/mkunitmap.pl
@@ -0,0 +1,21 @@
+#!/usr/bin/perl
+#
+# mkunitmap -- create unit map file from .aps files
+#
+# Usage:   reads one or more aps files from standard input, writes 
+#          tab-delimited list of problem id, problem set pairs to stdout.
+#
+# This can be used to generate the mapping file needed by log2xml
+use File::Basename;
+while (<>) 
+{
+	chomp;
+	next if (/ANDES Problem Set/);
+	next if (/\.wmv/);
+	$problem = $_;
+        $problem =~ tr/a-z/A-Z/;
+	$problem =~ s/\r//;
+	next if $problem eq "";
+         ($setname,$dir,$ext) = fileparse($ARGV, qr/\..*/);
+	print "$problem\t$setname\n"; 
+}
diff --git a/LogProcessing/unescape.pl b/LogProcessing/unescape.pl
new file mode 100755
index 000000000..ac50c1564
--- /dev/null
+++ b/LogProcessing/unescape.pl
@@ -0,0 +1,13 @@
+# copy stdin to stdout, changing CR-slash-n sequences to CR-LF
+# Needed to handle logs extracted by certain database query which 
+# returns entire logs as single lines, with newlines escaped, using
+# a single newline char between logs:
+
+while (<>) { # gulps a whole log as a single line with LF at end
+  chomp($_);  # remove final NL separating logs
+  s/\r\\n/\r\n/g;   # CR bslash n => CR LF    used at end of lines
+  s/\\t/\t/g;       # bslash t => TAB 	     used after time stamps
+  # following occurs in escape sequence inside some hints, e.g \l, \v, \n
+  s/\\\\/\\/g;      # bslash bslash => bslash  
+  print "$_";
+}
diff --git a/LogProcessing/unmapid-usna.pl b/LogProcessing/unmapid-usna.pl
new file mode 100755
index 000000000..02310ba40
--- /dev/null
+++ b/LogProcessing/unmapid-usna.pl
@@ -0,0 +1,31 @@
+#!/usr/bin/perl
+#
+# unmapid -- map anonymized USNA mid ids in input file using 
+#            external tab-delimited mapping file
+#
+# changes occurrences of an apparent anonymized mid id, once per line.
+# If multiple user ids map to same anonymized id, which one is chosen
+# is arbitrary
+
+# load mapping from "idmap.txt" in current working directory
+open MAPFILE, "<idmap.txt" or die "open: $!";
+%map = map /(.*)\t(.*)\r/, <MAPFILE>;
+close MAPFILE;
+%idmap = reverse %map;
+
+while (<>)
+{
+	s/\r$//;
+
+	# Match any five hex-digit string
+	if (/([0-9A-F][0-9A-F][0-9A-F][0-9A-F][0-9A-F])/) {
+		if ($newid = $idmap{$1}) {
+			# print "changing $& to $newid\n";
+			s/$&/$newid/;	
+		} else {
+			print STDERR "no log idmap entry for $&\n";
+		}
+	}
+	print; 
+}
+