diff --git a/LogProcessing/Info.sample/class-3326.xml b/LogProcessing/Info.sample/class-3326.xml new file mode 100755 index 000000000..67c219fc8 --- /dev/null +++ b/LogProcessing/Info.sample/class-3326.xml @@ -0,0 +1,7 @@ + + SP212 General Physics II + USNA + 3326 + Electricity and Magnetism Spring 2007 + Don Treacy + diff --git a/LogProcessing/Info.sample/class-5526.xml b/LogProcessing/Info.sample/class-5526.xml new file mode 100755 index 000000000..61e634bfa --- /dev/null +++ b/LogProcessing/Info.sample/class-5526.xml @@ -0,0 +1,7 @@ + + SP212 General Physics II + USNA + 5526 + Electricity and Magnetism Spring 2007 + Don Treacy + diff --git a/LogProcessing/Info.sample/class-6546.xml b/LogProcessing/Info.sample/class-6546.xml new file mode 100755 index 000000000..c965e7823 --- /dev/null +++ b/LogProcessing/Info.sample/class-6546.xml @@ -0,0 +1,7 @@ + + SP212 General Physics II + USNA + 6546 + Electricity and Magnetism Spring 2007 + Mary Wintersgill + diff --git a/LogProcessing/Info.sample/classmap.txt b/LogProcessing/Info.sample/classmap.txt new file mode 100755 index 000000000..110cf03de --- /dev/null +++ b/LogProcessing/Info.sample/classmap.txt @@ -0,0 +1,79 @@ +DE16B 3326 +DE14D 3326 +DE099 3326 +DB8B7 3326 +DDEEF 3326 +090966 3326 +DDC25 3326 +DD961 3326 +DD8DD 3326 +DD7F9 3326 +DD71E 3326 +DD517 3326 +DD457 3326 +DD2BF 3326 +DD2A7 3326 +DCF0B 3326 +live4nectar 3326 +DCE39 3326 +DCD49 3326 +DCD19 3326 +DCD07 3326 +m095652 3326 +DCC4D 3326 +DCC3B 3326 +DC96B 3326 +DE20A 5526 +DE18F 5526 +DDD63 5526 +DDD33 5526 +DD925 5526 +DD64F 5526 +DD60D 5526 +DD54D 5526 +DD45D 5526 +DD3D3 5526 +DD1AB 5526 +DD199 5526 +DD01F 5526 +DCF65 5526 +DCFA1 5526 +DCDA9 5526 +DCB21 5526 +DC9B9 5526 +DC995 5526 +DC8F3 5526 +DC8E7 5526 +DC791 5526 +DC6EF 5526 +DC6E9 5526 +DE24F 6546 +DE22B 6546 +DE147 6546 +DE05D 6546 +DDF07 6546 +DDF01 6546 +DDF01 6546 +DDDFF 6546 +DDD57 6546 +DDD21 6546 +DDC1F 6546 +DDA87 6546 +DD955 6546 +DD8E3 6546 +DD7E7 6546 +DD74B 6546 +DD48D 6546 +DD2DD 6546 +DD193 6546 +DD06D 6546 +DD037 6546 +DCFDD 6546 +DCECF 6546 +DCECF 6546 +DCDCD 6546 +DCBF9 6546 +DCB03 6546 +DDACF 5526 +DD469 3326 + diff --git a/LogProcessing/Info.sample/condition-control.xml b/LogProcessing/Info.sample/condition-control.xml new file mode 100755 index 000000000..1866779a2 --- /dev/null +++ b/LogProcessing/Info.sample/condition-control.xml @@ -0,0 +1,4 @@ + + Katz Control + Control + diff --git a/LogProcessing/Info.sample/condition-experiment1.xml b/LogProcessing/Info.sample/condition-experiment1.xml new file mode 100755 index 000000000..f1410ce6c --- /dev/null +++ b/LogProcessing/Info.sample/condition-experiment1.xml @@ -0,0 +1,4 @@ + + Katz Short KCD + Experimental + diff --git a/LogProcessing/Info.sample/condition-experiment2.xml b/LogProcessing/Info.sample/condition-experiment2.xml new file mode 100755 index 000000000..412d02184 --- /dev/null +++ b/LogProcessing/Info.sample/condition-experiment2.xml @@ -0,0 +1,4 @@ + + Katz Long KCD + Experimental + diff --git a/LogProcessing/Info.sample/dataset.txt b/LogProcessing/Info.sample/dataset.txt new file mode 100755 index 000000000..694fa31c3 --- /dev/null +++ b/LogProcessing/Info.sample/dataset.txt @@ -0,0 +1 @@ +USNA Physics Spring 2007 diff --git a/LogProcessing/Info.sample/unitmap.txt b/LogProcessing/Info.sample/unitmap.txt new file mode 100755 index 000000000..9900e362b --- /dev/null +++ b/LogProcessing/Info.sample/unitmap.txt @@ -0,0 +1,573 @@ +MOMR1A Angular Momentum +MOMR1B Angular Momentum +MOMR2A Angular Momentum +MOMR2B Angular Momentum +MOMR3A Angular Momentum +MOMR4A Angular Momentum +EQCAP1A Capacitance +EQCAP1B Capacitance +EQCAP1C Capacitance +EQCAP1D Capacitance +EQCAP2A Capacitance +EQCAP2B Capacitance +EQCAP3A Capacitance +EQCAP3B Capacitance +EQCAP4A Capacitance +EQCAP4B Capacitance +EQCAP5A Capacitance +EQCAP6A Capacitance +CAP1A Capacitance +CAP1B Capacitance +CAP2A Capacitance +CAP2B Capacitance +CAP3A Capacitance +CAP4A Capacitance +CAP5A Capacitance +CAP6A Capacitance +CAP6B Capacitance +CAP9A Capacitance +CAP9B Capacitance +ROTS1A Circular Motion +ROTS1B Circular Motion +ROTS1C Circular Motion +ROTS2A Circular Motion +ROTS3A Circular Motion +ROTS4A Circular Motion +ROTS5A Circular Motion +ROTS6A Circular Motion +ROTS6B Circular Motion +ROTS6C Circular Motion +ROTS7A Circular Motion +ROTS8A Circular Motion +ROTS8B Circular Motion +KIR1A DC Circuits +KIR1B DC Circuits +KIR2A DC Circuits +KIR3A DC Circuits +KIR3B DC Circuits +KIR3C DC Circuits +KIR4A DC Circuits +KIR5A DC Circuits +KIR7A DC Circuits +EPOW1 DC Circuits +EPOW2 DC Circuits +EPOW3 DC Circuits +EPOW4 DC Circuits +RC1A DC Circuits +RC1B DC Circuits +RC1C DC Circuits +RC2A DC Circuits +RC3A DC Circuits +RC3B DC Circuits +RC4A DC Circuits +RC4B DC Circuits +RC5A DC Circuits +RC6A DC Circuits +RC7A DC Circuits +RC7B DC Circuits +RC8 DC Circuits +RC9 DC Circuits +CHARGE1A Electric Field +CHARGE1B Electric Field +CHARGE2 Electric Field +COUL1A Electric Field +COUL1B Electric Field +COUL1C Electric Field +COUL2A Electric Field +COUL2B Electric Field +COUL2C Electric Field +COUL3 Electric Field +EFIELD1A Electric Field +EFIELD1B Electric Field +EFIELD1C Electric Field +EFIELD1D Electric Field +EFIELD1E Electric Field +EFIELD2 Electric Field +EFIELD3 Electric Field +EFIELD4A Electric Field +EFIELD4B Electric Field +FOR1A Electric Field +FOR1B Electric Field +FOR1C Electric Field +FOR2A Electric Field +FOR2B Electric Field +FOR4A Electric Field +FOR4B Electric Field +FOR5 Electric Field +FOR7A Electric Field +FOR7B Electric Field +FOR8A Electric Field +FOR8B Electric Field +FOR9A Electric Field +FOR9B Electric Field +FOR10A Electric Field +FOR10B Electric Field +FOR11A Electric Field +FOR11B Electric Field +FOR11C Electric Field +ELEC1A Electric Field +ELEC1B Electric Field +ELEC2 Electric Field +ELEC3B Electric Field +ELEC4B Electric Field +ELEC5B Electric Field +ELEC6B Electric Field +GAUSS1 Electric Field +GAUSS3 Electric Field +GAUSS4 Electric Field +GAUSS5 Electric Field +GAUSS6 Electric Field +GAUSS8 Electric Field +GAUSS9 Electric Field +GAUSS10 Electric Field +GAUSS11 Electric Field +DIP1A Electric Field +DIP1B Electric Field +EPOT1A Electric Potential +EPOT1B Electric Potential +EPOT1C Electric Potential +EPOT2 Electric Potential +POT1A Electric Potential +POT1B Electric Potential +POT2A Electric Potential +POT2B Electric Potential +POT2C Electric Potential +POT3A Electric Potential +POT3B Electric Potential +POT4 Electric Potential +POT5 Electric Potential +POT6 Electric Potential +POT7 Electric Potential +POT8 Electric Potential +FARA1A Electromagnetic Induction +FARA1B Electromagnetic Induction +FARA2A Electromagnetic Induction +FARA2B Electromagnetic Induction +FARA3A Electromagnetic Induction +FARA3B Electromagnetic Induction +FARA4A Electromagnetic Induction +FARA4B Electromagnetic Induction +FARA5A Electromagnetic Induction +FARA5B Electromagnetic Induction +FARA5C Electromagnetic Induction +FARA6A Electromagnetic Induction +FARA6B Electromagnetic Induction +FARA7A Electromagnetic Induction +FARA7B Electromagnetic Induction +FARA7C Electromagnetic Induction +FARA7D Electromagnetic Induction +FARA8A Electromagnetic Induction +FARA8B Electromagnetic Induction +FARA8C Electromagnetic Induction +FARA9 Electromagnetic Induction +FARA10A Electromagnetic Induction +FARA10B Electromagnetic Induction +FARA11A Electromagnetic Induction +FARA11B Electromagnetic Induction +AMP1 Electromagnetic Induction +EMWAVE1 Electromagnetic Waves +WAVE19 Electromagnetic Waves +EMWAVE3A Electromagnetic Waves +EMWAVE4 Electromagnetic Waves +EMWAVE5 Electromagnetic Waves +E1A Energy-Work +E1B Energy-Work +E1C Energy-Work +E2A Energy-Work +E2B Energy-Work +E2C Energy-Work +E3A Energy-Work +E4A Energy-Work +E4B Energy-Work +E4C Energy-Work +E5A Energy-Work +E5B Energy-Work +E6A Energy-Work +E7A Energy-Work +E7B Energy-Work +E8A Energy-Work +E8B Energy-Work +E9A Energy-Work +E9B Energy-Work +E10A Energy-Work +E11A Energy-Work +WE1A Energy-Work +WE2A Energy-Work +WE3A Energy-Work +WE4A Energy-Work +WE5 Energy-Work +WE6 Energy-Work +WE8 Energy-Work +WE9 Energy-Work +EGRAV1 Energy-Work +FLUIDS1 Fluids +FLUIDS2 Fluids +FLUIDS3 Fluids +FLUIDS4 Fluids +FLUIDS5 Fluids +FLUIDS6 Fluids +FLUIDS7 Fluids +FLUIDS8 Fluids +FLUIDS9 Fluids +FLUIDS11 Fluids +FLUIDS12 Fluids +FLUIDS13 Fluids +FLUIDS14 Fluids +FLUIDS15 Fluids +FBD1A Free Body Diagrams +FBD1B Free Body Diagrams +FBD2A Free Body Diagrams +FBD3A Free Body Diagrams +FBD4A Free Body Diagrams +FBD5A Free Body Diagrams +FBD6A Free Body Diagrams +FBD8 Free Body Diagrams +FBD9 Free Body Diagrams +IND1A Inductance +IND1B Inductance +IND1C Inductance +IND2A Inductance +IND3A Inductance +IND3B Inductance +IND3C Inductance +IND4 Inductance +LR1A Inductance +LR1B Inductance +LR1C Inductance +LR1D Inductance +LR2A Inductance +LR2B Inductance +LR3A Inductance +LR3B Inductance +LC1A Inductance +LC2A Inductance +LC2B Inductance +LRC1A Inductance +LRC2A Inductance +LMOM1A Linear Momentum +LMOM1B Linear Momentum +LMOM2A Linear Momentum +LMOM2B Linear Momentum +LMOM3A Linear Momentum +LMOM4A Linear Momentum +LMOM5 Linear Momentum +LMOM6 Linear Momentum +LMOM7 Linear Momentum +PGRAPH1 Linear Momentum +PGRAPH2 Linear Momentum +PGRAPH3 Linear Momentum +IMP1 Linear Momentum +IMP2 Linear Momentum +IMP3A Linear Momentum +IMP3B Linear Momentum +IMP3C Linear Momentum +CM1 Linear Momentum +CM2 Linear Momentum +CM3 Linear Momentum +ROC1 Linear Momentum +ROC2 Linear Momentum +ROC3 Linear Momentum +ROC4 Linear Momentum +ROC5 Linear Momentum +ROC6 Linear Momentum +MAG1A Magnetic Field +MAG1B Magnetic Field +MAG1C Magnetic Field +MAG2A Magnetic Field +MAG2B Magnetic Field +MAG3A Magnetic Field +MAG3B Magnetic Field +MAG4A Magnetic Field +MAG5A Magnetic Field +MAG5B Magnetic Field +MAGTOR1A Magnetic Field +MAGTOR1B Magnetic Field +MAGTOR1C Magnetic Field +MAGTOR1D Magnetic Field +MAGDIP1 Magnetic Field +MAGDIP2 Magnetic Field +MAGDIP3 Magnetic Field +MAGDIP4 Magnetic Field +MAG6A Magnetic Field +MAG6B Magnetic Field +MAG6C Magnetic Field +MAG7 Magnetic Field +MAG8A Magnetic Field +MAG8B Magnetic Field +MAG9 Magnetic Field +MAG10 Magnetic Field +MAG11 Magnetic Field +MAG12 Magnetic Field +MIRROR1 Optics +MIRROR2 Optics +MIRROR3 Optics +MIRROR4 Optics +LENS1A Optics +LENS1B Optics +LENS2A Optics +LENS2B Optics +LENS3A Optics +LENS3B Optics +LENS4A Optics +LENS4B Optics +LENS5A Optics +LENS5B Optics +REF1 Optics +REF2A Optics +REF2B Optics +REF2C Optics +REF3A Optics +REF3B Optics +REF4A Optics +REF4B Optics +REF5A Optics +REF5B Optics +REF6 Optics +INT1A Optics +INT1B Optics +INT1C Optics +INT1D Optics +INT2A Optics +INT2B Optics +OSC1 Oscillations +OSC2 Oscillations +OSC3 Oscillations +OSC4 Oscillations +OSC5 Oscillations +OSC6 Oscillations +OSC7 Oscillations +OSC8 Oscillations +POW1A Power +POW1B Power +POW2A Power +POW3A Power +POW4A Power +POW4B Power +POW5A Power +POW5B Power +POW5C Power +POW5D Power +POW6A Power +EQRES1A Resistance +EQRES1B Resistance +EQRES1C Resistance +EQRES1D Resistance +EQRES1E Resistance +EQRES2A Resistance +EQRES2B Resistance +EQRES3A Resistance +EQRES3B Resistance +EQRES4A Resistance +EQRES4B Resistance +EQRES5A Resistance +EQRES6A Resistance +EQRES7A Resistance +EQRES7B Resistance +EQRES8A Resistance +EQRES8B Resistance +DR1A Rotational Dynamics +DR2A Rotational Dynamics +DR2B Rotational Dynamics +DR3A Rotational Dynamics +DR4A Rotational Dynamics +DR5A Rotational Dynamics +DR6A Rotational Dynamics +DR6B Rotational Dynamics +DR7A Rotational Dynamics +DR8A Rotational Dynamics +EROT2 Rotational Dynamics +EROT3 Rotational Dynamics +EROT4 Rotational Dynamics +GRAV1 Rotational Dynamics +GRAV2 Rotational Dynamics +GRAV3 Rotational Dynamics +GRAV4 Rotational Dynamics +GRAV5 Rotational Dynamics +KR8 Rotational Kinematics +KR9 Rotational Kinematics +KR1A Rotational Kinematics +KR1B Rotational Kinematics +KR1C Rotational Kinematics +KR2A Rotational Kinematics +KR2B Rotational Kinematics +KR3A Rotational Kinematics +KR3B Rotational Kinematics +KR3C Rotational Kinematics +KR4A Rotational Kinematics +KR4B Rotational Kinematics +KR5A Rotational Kinematics +KR6A Rotational Kinematics +KR7A Rotational Kinematics +S1A Statics +S1B Statics +S1C Statics +S1D Statics +S1E Statics +S1F Statics +S2A Statics +S2B Statics +S2C Statics +S2D Statics +S2E Statics +S3A Statics +S3B Statics +S3C Statics +S4A Statics +S4B Statics +S5A Statics +S6A Statics +S7A Statics +S7B Statics +S8A Statics +S9A Statics +S10A Statics +S11A Statics +S11B Statics +S12A Statics +S13 Statics +S14 Statics +S15 Statics +S16 Statics +S17 Statics +DQ1 Translational Dynamics +DT1A Translational Dynamics +DT1B Translational Dynamics +DT1C Translational Dynamics +DT2A Translational Dynamics +DT3A Translational Dynamics +DT3B Translational Dynamics +DT3C Translational Dynamics +DT4A Translational Dynamics +DT4B Translational Dynamics +DT5A Translational Dynamics +DT6A Translational Dynamics +DT6B Translational Dynamics +DT6C Translational Dynamics +DT7A Translational Dynamics +DT7B Translational Dynamics +DT8A Translational Dynamics +DT9A Translational Dynamics +DT10A Translational Dynamics +DT11A Translational Dynamics +DT11B Translational Dynamics +DT12A Translational Dynamics +DT13A Translational Dynamics +DT13B Translational Dynamics +DT14A Translational Dynamics +DT14B Translational Dynamics +DT16 Translational Dynamics +DT17 Translational Dynamics +DT18 Translational Dynamics +DT19 Translational Dynamics +KT1A Translational Kinematics +KT1B Translational Kinematics +KT2A Translational Kinematics +KT2B Translational Kinematics +KT3A Translational Kinematics +KT3B Translational Kinematics +KT4A Translational Kinematics +KT4B Translational Kinematics +KT5A Translational Kinematics +KT6A Translational Kinematics +KT6B Translational Kinematics +KT7A Translational Kinematics +KT7B Translational Kinematics +KT8A Translational Kinematics +KT8B Translational Kinematics +KT9A Translational Kinematics +KT9B Translational Kinematics +KT10A Translational Kinematics +KT10C Translational Kinematics +KT11A Translational Kinematics +KT11B Translational Kinematics +KT12A Translational Kinematics +KT12B Translational Kinematics +KT12C Translational Kinematics +KT13A Translational Kinematics +KT13B Translational Kinematics +KT13C Translational Kinematics +KT14A Translational Kinematics +KT14B Translational Kinematics +KGRAPH1 Translational Kinematics +KGRAPH2 Translational Kinematics +KGRAPH3 Translational Kinematics +KGRAPH4 Translational Kinematics +KGRAPH5 Translational Kinematics +KGRAPH6 Translational Kinematics +KGRAPH7 Translational Kinematics +KGRAPH8 Translational Kinematics +KGRAPH9 Translational Kinematics +KGRAPH10 Translational Kinematics +KGRAPH11 Translational Kinematics +KGRAPH12 Translational Kinematics +KGRAPH13 Translational Kinematics +KGRAPH14 Translational Kinematics +KGRAPH16 Translational Kinematics +KGRAPH17 Translational Kinematics +KGRAPH18 Translational Kinematics +KGRAPH19 Translational Kinematics +KGRAPH20 Translational Kinematics +KGRAPH21 Translational Kinematics +VEC1A Vectors +VEC1B Vectors +VEC1C Vectors +VEC1D Vectors +VEC2A Vectors +VEC2B Vectors +VEC2C Vectors +VEC2D Vectors +VEC3A Vectors +VEC3B Vectors +VEC3C Vectors +VEC4A Vectors +VEC4B Vectors +VEC4C Vectors +VEC4D Vectors +VEC5A Vectors +VEC5B Vectors +VEC5C Vectors +VEC5D Vectors +VEC6A Vectors +VEC6B Vectors +VEC6C Vectors +VEC6D Vectors +VEC7A Vectors +VEC8A Vectors +VEC8B Vectors +VEC8C Vectors +VEC9 Vectors +RELVEL1A Vectors +RELVEL2A Vectors +RELVEL3A Vectors +MOT1 Vectors +MOT2 Vectors +MOT3 Vectors +MOT4 Vectors +WAVE1 Waves +WAVE2 Waves +WAVE3 Waves +WAVE4 Waves +WAVE5 Waves +WAVE6 Waves +WAVE8 Waves +WAVE9 Waves +WAVE10 Waves +WAVE11 Waves +WAVE12 Waves +WAVE13 Waves +WAVE14 Waves +WAVE15 Waves +WAVE16 Waves +WAVE17 Waves +WAVE18 Waves +WAVE24 Waves +FOR4A Electric Field +FOR4B Electric Field +ELEC1A Electric Field +FOR4C Electric Field +ELEC2 Electric Field +ELEC3A Electric Field +ELEC4A Electric Field +ELEC5A Electric Field +ELEC6A Electric Field +ELEC7A Electric Field \ No newline at end of file diff --git a/LogProcessing/OLI Log Processing.html b/LogProcessing/OLI Log Processing.html new file mode 100755 index 000000000..675bb6d2f --- /dev/null +++ b/LogProcessing/OLI Log Processing.html @@ -0,0 +1,357 @@ + + + + + ANDES Log Processing + + +

Processing ANDES Logs from OLI

+
+This document explains how to obtain Andes log sets from OLI, anonymize +them, and convert them into datashop format. All scripts for dealing +with log files can be found in the LogProcessing directory of the Andes +tree.
+
+

Obtaining Logs from OLI

+OLI logs can be retreived from the oli QA server, +oli-qa.andrew.cmu.edu, using OLI's Data +Extraction tool. OLI will have to give you an account and +configure your access to the data extraction tool, after which a link +for it will show up as a long on the top right of your start page when +you log in to OLI.
+
+Logs on the QA server are mirrors of those on the OLI production +server. These mirrors are made periodically, typically when QA software +is updated and at other irregular intervals. So, they typically lag the +data on production by a few weeks. It is not allowed to retrieve logs +from the production server since the process can overload the server.
+
+For courses hosted on the PSLC server, the Data Extraction tool may be +made available on the live server at any time.
+
+

Running the Data Extraction tool

+After clicking the Data Extraction tool, you are walked through a +series of screens to define a query. You should make the following +selections:
+
+   Step 1: Select scope of data
+     Go down to "Course Sections" and select the +course sections you want. The available ones for you should be +highlighted. OLI may have to configure your access to particular course +logs. +Otherwise you will see them but not be able to select them in the Data +Extraction tool interface.
+    Do not select anything else on this page. You do not +normally want to select by content package, for example.
+    Click Go to Step 2
+
+    Step 2: Options
+          Columns: This selects which columns +from the OLI log database will be included in the output in a +comma-separated format. The essential one is the "info" column. The +info column records application-specific information. In our +case,  there a single row in the OLI log database corresponding to +the event of Andes uploading a log file at the end of an Andes problem +session. This row includes the entire text of an Andes session logfile +in the "info" field, so info should always remain checked.
+        One should also check the "Action +TimeStamp"  to receive the server's time at the time the log was +made. This is useful to have because the date and time shown inside the Andes logs is derived +from the user's system clock, which may be incorrectly set. So the OLI +recorded event time is more reliable to have. This time should be very +close to the time of the end of +the Andes session, when the log file was uploaded.
+         One may check the Action +Time Zone for a more complete specification about the time. +However,  since this event is recorded by the OLI server, this +does not really add information -- the times here should always be in +the Eastern time zone where the OLI servers are, even if the students +are working in different time zones.
+          All other columns should +be unchecked to keep the output simple.
+
+          Notes: In theory +there might be some use for the User Id column, which will show an +anonymized OLI user id (into a long ugly GUID), or the Session GUID, +which identifes an OLI login session. This detail could be used to +correlate Andes logs with other OLI, non-ANDES log entries made by the +same student or within the same OLI login session. For example, one can +also use the Data Extraction tool to retrieving logs showing access to +learning pages in the course. With that result, one could determine +which learning pages the student visited in the same session as the +Andes log was made.  However, we have never made any use of this +information up to now.
+   
+    The question has sometimes come up as to whether we +can determine when students view training videos on OLI from their +logs. Right now, there is no record made in the OLI log database of +these events, because they are just serves of  webcontent that do +not pass through the OLI courseware system. On way around this would be +to wrap each video in its own learning page. In this case there would be a log of those learning +page accesses. However, that would not show whether the student had +launched the video or not, merely whether they had opened the page on +which it resides. Another possible way might be to convert the videos +to Flash format, and make use of Flash media logging support built into +certain OLI tools. This would log media viewing events in the +DataShop's xml notation, and these events could then be retrieved
+
+          Actions: Select Andes. This is a +custom action which indicates uploading an Andes log at the end of an +Andes problem session.
+
+          Filters: leave blank
+
+          Options:  Normally one should +select a date range which includes all logs in the course but exclude +other semesters that might contain the same student. The dates do +not  have to be exact . For example, for a Spring term 2007 +course, it could suffice to select a range from 2007-01-01 to +2007-06-01, even if the exact course date range was somewhat narrower.
+
+          The reason for this is +that OLI does not actually record course information in the log +database. Rather, when you request logs for a course, the Data +Extraction server looks up the course roster and translates this into a +query for logs by all students on that roster. If a student was in both +a fall term and spring term course, this will retrieve logs from +outside of the course. 
+
+          All other options should +be left as defaults.
+
+At Step 3, the query is submitted and you should start downloading a +zip file named data.zip. However, this step often times out or fails if +the server is heavily +loaded. In that case you just have to try again at a different time.
+
+    The data.zip itself contains a single file named +actions.csv, with all the data. Save this zip file with some +identifying name like Fall2007-USNA.dat.z
+

Concatenated log format

+    The embedded file you get is formally a csv file +showing selected rows in a database, like a +spreadsheet. The first line gives the column headings and subsequent +lines give comma-separated values. As noted above, the +last "column" of each line is the full Andes log, which normally +includes multiple text lines within it.  Effectively that makes +this file a concatenated sequence of Andes logs, with a +little bit of cruft before the initial log header line.
+
+    For most purposes the concatenated file can be +processed as a unit. By using zcat and piping output to further +processing, it may not even necessary to uncompress it. However, it +could also be split into separate log files for processing +-- see the splitlogs.pl tool in the Andes LogProcessing directory.
+
+    Within the concatenated log, individual session logs +can be found by searching for the Andes log header line, which will +have the following format:
+
+2008/04/03 18:15:21,Eastern,# Log of Andes session begun Thursday, +April 03, 2008 18:14:50 by [user] on [computer]
+
+    Individual logs should each end with an END-LOG line
+1:00    END-LOG
+    However, in theory an error could prevent the +END-LOG statement from getting into the log, so it is safest just to +end a log at the header of the next one.
+
+     Technically this file is in Unix text file +format, in which there is a single newline characters at the end of +each row from the database. However, the last column of all the rows +after the first, header line, end with the full text of an Andes log +file, and an Andes log file is tself a multi-line Windows-format  +text file in which the lines end with carriage-return linefeed pairs. +One effect of this is that when viewed in a text editor, there will +appears to be a blank line after each log file.
+
+  
+

Anonymizing the logs

+

+
+There are different scripts for anonymizing USNA logs, where our +students are supposed to use their alphas, aka mid numbers, as user +ids, and anonymizing arbitrary student logs, where the ids may take any +form.
+
Anonymizing USNA midshipmen logs: mkanon-usna.pl
+
+To anonymize mid logs, obtain the file mkanon-usna.pl.template. Edit it +to include a different hash function in the munge_id routine to map the +mid number into some anonymization code. Rename it to makanon-usna.pl. +The basic way to anonymize is then:
+          zcat Fall2007-USNA.dat.z | +perl mkanon-usna.pl  > Fall2007-USNA-anon.dat 2> idmap.txt
+
+The standard output will be the anonymized concatenated log +(uncompressed). The stderr output will contain any error messages, of +which the most important is reports of ids that do not conform to the +mid number patten. This will be followed by a tab-delimited listing of +the id mapping file generated. This mapping should be saved reference +and for use in applying the mapping to other files. So you should +inspect idmap first, and edit out any error message before saving it. +[Maybe better to just write idmap.txt file separately from error +messages? ]
+
+A nuisance is that sometimes the same real student will have created +two or more different OLI ids. Also, a student may not have used a mid +number at all as an id. Normally we ask instructors to grant us TA +access to courses we +support so we can inspect the gradebook and rosters, so the usual way +to identify such students is to inspect the OLI section rosters, which +will show students with the same real name on different lines.
+
+Both these issues can be handled by creating a file named merge-ids.txt +in the directory in which the script is run. This should be +tab-separated two column listing mapping login ids to canonical  +user ids: instances of the first id will be mapped to the second before +anonymizing. The second id need never have been actually used, e.g. for +someone who used a non-mid number id, you could just map it to a mid +number.
+
+Note the anonymization will already merge students who used two user +ids consisting of a mid number with and without an initial "m" since it +anonymizes based on the mid number itself. So no special entry is +required for these.
+
Applying the id map to other files: mapid-usna.pl
+
+The mapping defined in the saved file idmap.txt can be applied to other +text files, e.g.a list of questionnaire respondants or students in some +experimental condition, by using the script mapid-usna.pl.
+       mapid-usna.pl < infile > outfile
+This script looks for a file named idmap.txt in the current directory. +This also should be customized from the template file to include the +same mapping function used in mkanon-usna. This will apply the map to +mid numbers in the  If an id is not found in the map, it will use +the hashing algorithm to generate one, generating an error message. The +advantage of  re-using the log-generated mapping file is that it +tells us when it encounters a user id for which no logs were found in +the set. This may indicate an anomaly requiring investigation, e.g. a +questionnaire respondant who entered his user id incorrectly.
+
Anonymizing arbitrary logs
+Student ids in non-USNA datasets can be anonymized with the mkanon.pl +and mapid.pl scripts. These don't use a hashing algorithm. Rather, they +simply generate ids by adding numbers to a specified prefix. To use +these, create a  tab-separated mapping file (of any name) +initialized to contain a special mapping for PREFIX, e.g
+          PREFIX\tWH081
+Then do
+            perl mkanon.pl +mapfile.txt < log > newlog
+where mapfile.txt is the mapping file. This will update mapfile.txt +with the new mapping file.
+
+This can be done multiple times; in each case, an existing mapping will +be used if found, and the mapping file will be updated with any new +entries at the end.
+
+As above, the mapping can be applied to arbitrary text by
+       perl mapid.pl mapfile.txt < old > +new
+
+

Converting Raw Logs to DataShop Format

+The script log2xml.pl can be used to convert an anonymized dataset into +files in the xml format the DataShop uses for import into their +database. By the datashop's request, this will generate one folder per +student, and one xml log file per session log. At the end of the +conversion, this should be zipped up and delivered to the DataShop for +dropoff. The DataShop should give you ssh access to a dropbox location +on their "cooker" server for this purpose.
+
Info files
+Although the basic conversion is simple, the converter makes use of +several external files to patch in information not included in the +Andes raw logs. It will look for them in a subdirectory named "Info" +within its working directory when run.
+
+Required:
+   dataset.txt -- contains dataset name
+Optional:
+   classmap.txt -- student to class id mapping.
+   class-XXX.xml -- class XML element for class w/id XXX
+   conditionmap.txt -- student to condition id mapping
+   condition-XXX.xml -- condition element for id XXX
+   unitmap.txt  -- problem to unit mapping
+
+The dataset.txt file must exit. All others are optional, if not found, +this info will not be included in the conversion.
+
+Samples of these files can be viewed in the directory Info.sample in +the LogProcessing directory.
+
Class information
+The classmap file is a two-column tab-separated mapping of student +names to some string identifying the class they were in. This could be +a section number like "3346" or it could just be an instructor name +like "gershmann". The class-XXX.xml then gives the xml element to patch +into the conversion for specifying the student's class.  For +example:
+
+<class>
+           +<name>SP212 General Physics II</name>
+           +<school>USNA</school>
+           +<period>6546</period>
+       <description>Electricity and +Magnetism Spring 2007</description>
+           +<instructor>Mary Wintersgill</instructor>
+</class>
+
+If all this information is not known, then it need not be included.
+
+This information must be obtained from OLI rosters or other sources +from the instructor or experimenter. One way is to build a spreadsheet +or list, cut the column or real names and save to a file, anonymize +that file, and paste it back into the spreadsheet, then save in +tab-delimited format.
+
Condition information
+Experimental condition information may come from two sources: it may be +included in the logs, if OLI had been customized for different +conditions, as in Sandy Katz's USNA experiments. In this case the +condition will always be one of "control", "experiment", "experiment1" +or "experiment2", which are the only possible values defined in our OLI +course. In this case no conditionmap file is needed.
+
+Alternatively, the condition information may be patched in via a +conditionmap.txt file, which maps student ids to experiment condition +ids. This is more common for logs from lab studies.
+
+Again, the ids used here are arbitrary. The xml element +condition-XXX.xml will be patched into the converted log to explain the +student's experiment condition. A sample is
+
+    <condition>
+        <name>Katz Reflective +Dialogue</name>
+             +<type>Experimental</type>
+    </condition>
+
+Note a conditionmap will take priority over conditions found in the +logs. It is not required to include any condition information.
+
+I believe the datashop format does not support multiple conditions in a +log, even though a USNA student might have participated in multiple +experiments, say a lab study and also a longer study like Sandy Katz's. +
+
Unit information
+The converter also makes use of a mapping from problem id to problem +set, e.g kt1 -> Translational Kinematics, since this information is +not in the logs. This is contained in the file unitmap.txt. This file +can be generated from a set of aps files by the script  +mkunitmap.pl as
+       mkunitmap.pl *.aps > unitmap.txt
+
+ It changes rarely so it can usually just be copied.
+
+
+
+ + diff --git a/LogProcessing/log2xml.pl b/LogProcessing/log2xml.pl index 633e2cde6..374eb069a 100755 --- a/LogProcessing/log2xml.pl +++ b/LogProcessing/log2xml.pl @@ -1,3 +1,4 @@ +#!/usr/bin/perl ##################################################################### # log2xml -- convert ANDES log files into PSLC Datashop XML format # @@ -10,7 +11,8 @@ # output file is found in: # student-id/session-id.xml # -# Uses the following files from its working directory: +# Looks for the following supporting files in the Info subdirectory of +# its current working directory: # Required: # dataset.txt -- contains dataset name # Optional: @@ -27,10 +29,14 @@ # provide only the single file class.xml # # Condition ids may come from set-condition statement in log -# or from condition-map. There may be no conditions to set. +# or from condition-map, with condition-map taking precedence. +# There may be no conditions to set. +# +# The unitmap file needs to change only if new problems are added +# It can be generated by the mkunitmap.pl script. # ###################################################################### -my $revision_string = '$Revision: 1.10 $'; +my $revision_string = '$Revision: 1.11 $'; # globals for current log line my ($timestamp, $event, $argstr); @@ -114,7 +120,7 @@ # load the problem to unit mapping table, if it exists if (open UNITMAP, "; + %unitmap = map /(.*)\t([^\r]*)/, ; close UNITMAP; $have_unitmap = 1; #print STDERR "loaded unit map\n"; @@ -124,7 +130,7 @@ # we may get a condition from a conditionmap file, or else from set-condition # above. If both are set, condition map will override. if (open CONDITIONMAP, "; + %conditionmap = map /(.*)\t([^\r]*)/, ; close CONDITIONS; $have_conditionmap = 1; #print STDERR "loaded condition map\n"; @@ -132,7 +138,7 @@ # load the student to class mapping table, if it exists if (open CLASSMAP, "; + %classmap = map /(.*)\t([^\r]*)/, ; close CLASSMAP; $have_classmap = 1; } @@ -142,7 +148,7 @@ #-------------------------------------------------------------------------------- while (<>) { chomp($_); - s/\r$//; # delete dangling CR's remaining from Unix to DOS conversion + s/\r$//; # cygwin PERL includes DOS-mode CR's by default, see PERLIO # Log header line begins a new log. We could reset and get date from this, but # in fact we get it from initial set-session-id call below. However, need to @@ -282,6 +288,11 @@ $unit_level_begin = "$unit"; $unit_level_end = ""; } else { $unit_level_begin = $unit_level_end = "" }; + # AW: group level is only defined to make it easy to select, say, + # all problems named CM*, in the datashop by selecting that group. + # But datashop might let you do this anyway, even without the group level, + # just by using a pattern match on the problem name. If so, then there + # is not much point in including this. # try to include a group level using name prefix, if we find one # e.g. cm1a => CM*, roc2a => ROC*, etc. $group = $problem; # default if name doesn't include a number diff --git a/LogProcessing/mapid-usna.pl.template b/LogProcessing/mapid-usna.pl.template new file mode 100755 index 000000000..c39174de3 --- /dev/null +++ b/LogProcessing/mapid-usna.pl.template @@ -0,0 +1,45 @@ +#!/usr/bin/perl +# +# mapid -- map USNA mid ids in input file using external tab-delimited mapping +# file +# +# changes occurrences of an apparent mid id , once per line. +# An apparent mid id is any six digit number optionally preceded by an "m" or "M". + +# load mapping from "idmap.txt" in current working directory +open MAPFILE, "; +close MAPFILE; + +while (<>) +{ + s/\r$//; # cygwin perl includes CR from DOS-mode text files by default + + # Match any six digit number, optionally preceded by an "m". + # Note numeric ids coming out of excel may lose initial + # zero -- must fix this if this script is to work. + # !!! also matches digit string with more than 6 digits -- fix? + if (/[Mm]?([\d][\d][\d][\d][\d][\d])/) { + $alpha = $1; + if (($newid = $idmap{$alpha}) or + ($newid = $idmap{"m" . $alpha}) or + ($newid = $idmap{"M" . $alpha}) ){ + # print "changing $& to $newid\n"; + s/$&/$newid/; + } else { + &munge_id(); + s/$&/$newid/; + print STDERR "no log idmap entry for $& generated $newid\n"; + } + } + print; +} + +sub munge_id () # reads global $alpha, sets global $newid +{ + $num = $alpha; + # simple sample mapping function + $num += 8765; + # treat new num string as integer and format in hex + $newid = sprintf("%X", ($num + 0)); +} diff --git a/LogProcessing/mapid.pl b/LogProcessing/mapid.pl new file mode 100755 index 000000000..dc27a785f --- /dev/null +++ b/LogProcessing/mapid.pl @@ -0,0 +1,73 @@ +#!/usr/bin/perl +# +# mapid -- map list of student ids in input file using external +# tab-delimited mapping file, generating new id if not found +# and updating +# +# Usage: mapid.pl mapfile.txt [idfile.txt] +# Reads id list from second argument, standard input if none. Writes +# anonymized list to stdout, updating mapfile with newly generated +# mappings if any. Also writes newly generated pairs to stderr. +# +# Mapfile is tab-delimited list of canonical-name anon-id pairs. It +# should contain a special entry for "PREFIX" specifying the prefix +# to use when generating ids. +# + +my $filename = $ARGV[0]; shift @ARGV; +if (! $filename) { die "usage: mapid.pl mapfile.txt [inputfile]\n"; } + +# count of ids generated with this prefix +my $id_counter = 1; + +# load existing mapping from specified mapping file +open MAPFILE, "<$filename" or die "Couldn't open $filename for reading: $!\n" ; +%idmap = map /(.*)\t([^\r]*)/, ; # gulp in hash; Allow DOS mode \r at eol +close MAPFILE; +if (1) { # debugging printout + $maplength = (keys idmap); + print STDERR "Input mapping ($maplength) entries\n"; + while( ($k, $v) = each %idmap) { + print STDERR " |$k| |$v|\n"; + } + print STDERR "End input mapping\n\n"; +} +# ensure file includes prefix. Upper case ensures it can't conflict with +# any canonicalized id. +my $PREFIX_ID = "PREFIX"; +($prefix = $idmap{$PREFIX_ID}) or die "missing $PREFIX_ID entry in $filename\n"; + +# set counter to one more than number of existing entries. Since +# map contains a dummy prefix entry, this will be number of keys +$id_counter = (keys idmap); +#print STDERR "loaded id map, counter= $id_counter\n"; + +while (<>) +{ + s/\r$//; # cygwin perl may include CR from DOS-mode text files + # ids may have spaces, dots, or odd chars. Assume begin and + # end with a word character. + if (/[\w].*[\w]/) { + # to ignore case differences, use canonical lower case + # form in mapping table. + $canon_id = $id = $&; + $canon_id =~ tr/[A-Z]/[a-z]/; + if (! ($anonid = $idmap{$canon_id})) { + $anonid = $idmap{$canon_id} = $prefix . $id_counter++; + print STDERR "$canon_id\t$anonid\n"; + $modified_map = 1; + } + # NB: substitute for original, not canonical, form + s/$id/$anonid/; + } + print; +} + +# update mapping file if mapping has been extended. +if ($modified_map) { + open MAPFILE, ">$filename" or die "Couldn't open $filename for updating: $!\n"; + while( ($k, $v) = each %idmap) { + print MAPFILE "$k\t$v\r\n"; + } + close MAPFILE; +} diff --git a/LogProcessing/mkanon-usna.pl.template b/LogProcessing/mkanon-usna.pl.template new file mode 100755 index 000000000..efc8726f0 --- /dev/null +++ b/LogProcessing/mkanon-usna.pl.template @@ -0,0 +1,123 @@ +#!/usr/bin/perl +# +# mkanon -- anonymize USNA userids in Andes log files +# +# Usage: reads one or more logs from standard input, writes to standard +# output +# +# If input files have ids in their names, can write to a concatenated +# output file, then split out from there with splitlogs. +# +# Optional file merge-ids.txt in the current directory can specify +# mapping from actual userids to effective user ids for anonymization. +# This can be used to map non-mid-number ids to mid numbers, or multiple +# different ids to the same id, or numeric ids used by instructors to names. + +# old custom merge mapping +#%merge = ( "gossard419" => "m092508", +# "blindmelon" => "m094098"); + +if (open MERGEFILE, "; + close MERGEFILE; + #print STDERR "loaded merge map\n"; +} + +while (<>) +{ + s/\r$//; # cygwin perl includes CR from DOS-mode text files by default + + # Log header part saying "by JoeSmith on Joes-Computer" can reveal identity + # $1 $2 + if (/^(.*)# Log of Andes session begun (.*) by [\w-]+ on .*$/) + { + print "$1# Log of Andes session begun $2 by [user] on [computer]\n"; + } + # + # initial set session id call normally contains userid as part + # $1 $2 $3 + elsif (/^(.*)set-session-id "([^"]*)"(.*)$/) + { + # Session id form is UserID-MonthDay-Hours-Mins-Secs + # but any spaces in UserID are converted to hyphens + # ($id, $rest) = split(/-/, $2, 2); + @id_parts = split('-', $2); + $nparts = @id_parts; + $id = $session_id = join(' ', @id_parts[0 .. $nparts-5]); + $rest = join('-', @id_parts[$nparts-4 .. $nparts-1]); + &munge_id(); + print "$1set-session-id \"$newid-$rest\"$3\n"; + } + # + # read-student-info call also has user name + # $1 $2 $3 + elsif (/^(.*)read-student-info "([^"]*)"(.*)$/) + { + $id = $2; # session label should start with student id + if ($id ne $session_id) { + print STDERR "warning: student id $id != session id part $session_id!\n"; + } + &munge_id (); + print "$1read-student-info \"$newid\"$3\n"; + } + # + # kcd urls in hints can contain the user id. Here we just map + # it anywhere it is found + # + elsif ($id && /$id/i) + { + s/$id/$anonid/g; + print; + } + + # !!! Standalone Andes also records interactions with Login dialog + # should note this and suppress characters while in this box + else { + print; + } +} # end while (<>) + +# at end of input, dump idmap so it can be saved +foreach $id (sort keys %idmap) { + print STDERR "$id\t$idmap{$id}\n"; +} + + +sub munge_id () # reads global $id, sets global $newid +{ + # check if we have noted this as a duplicate account of some student + $primary_id = $merge{$id} ? $merge{$id} : $id; + + # check if it's a mid number which may occur with or without the initial "m" or "midn" + # prefix. If not, it may be a teacher or TA log + # Note: pattern is anchored at beginning but not end of id, so allows trailing cruft in + # id after proper mid number. This allows for extra letter as in m102340x which was used. + # But also matches longer digit string as in 26384826 used by instructor McClanahan. + # Maybe better to anchor at end and handle ids with trailing cruft via merge mechanism. + if ($primary_id =~ /^(m|mid|midn)?([\d][\d][\d][\d][\d][\d])/i) + { + # following in case we want to map on substructure: year parts will all + # be the same within a particular dataset. + # if ($primary_id =~ /^(m|mid|midn)?([\d][\d])([\d][\d][\d][\d])$/i) + # $yr = $2; # first two digits are two digit class year: 07, 08, 09 etc. + # $snum = $3; # remaining four digits are student number + $num= $2; + + # simple sample mapping function: + $num += 8765; + + # treat new num string as integer and format in hex + $newid = sprintf("%X", ($num + 0)); + # remember mapping + $idmap{$id} = $newid; + } + else { + # !!! Put alternative method here for non-usna student names + if (! $warned{$id} ) { + print STDERR "Non mid id $id found in $_\n"; + $warned{$id} = 1; + } + $newid = $id; + $idmap{$id} = $newid; + } +} diff --git a/LogProcessing/mkanon.pl b/LogProcessing/mkanon.pl new file mode 100755 index 000000000..63af3ae2c --- /dev/null +++ b/LogProcessing/mkanon.pl @@ -0,0 +1,144 @@ +#!/usr/bin/perl +# +# mkanon -- anonymize arbitrary userids in Andes log files, +# reading and updating a mapping file +# +# Usage: mkanon.pl mapfile.txt < logs > newlogs +# +# reads one or more logs from standard input, writing anonymized logs +# to standard output. +# +# Mapping file expected to be generated by mapid2.pl applied to list +# of user names. +# +# This works on a concatenated sequence of logs. If input files are +# separate logs with user ids in their names, can write to a concatenated +# output file, then split individual logs out from there with splitlogs. +# + +# Load mapping file +my $filename = $ARGV[0]; shift @ARGV; +if (! $filename) { die "usage: mapid.pl mapfile.txt [inputfile]\n"; } + +# count of ids generated with this prefix +my $id_counter = 1; + +# load existing mapping from specified mapping file +open MAPFILE, "<$filename" or die "Couldn't open $filename for reading: $!\n" ; +%idmap = map /(.*)\t(.*)\r/, ; # gulps in hash. Assumes DOS mode \r is read +close MAPFILE; +if (0) { # debugging printout + $maplength = (keys idmap); + print STDERR "Input mapping ($maplength) entries\n"; + while( ($k, $v) = each %idmap) { + print STDERR " |$k| |$v|\n"; + } + print STDERR "End input mapping\n\n"; +} +# ensure file includes prefix. Upper case ensures it can't conflict with +# any canonicalized id. +my $PREFIX_ID = "PREFIX"; +($prefix = $idmap{$PREFIX_ID}) or die "missing $PREFIX_ID entry in $filename\n"; + +# set counter to one more than number of existing entries. Since +# map contains a dummy prefix entry, this will be number of keys +$id_counter = (keys idmap); +#print STDERR "loaded id map, counter= $id_counter\n"; + +# Optional file merge-ids.txt in the current directory can specify +# mapping from actual userids to effective user ids for anonymization. +# This can be used to map non-mid-number ids to mid numbers, or multiple +# different ids to the same id, or numeric ids used by instructors to names. +my %merge; +if (open MERGEFILE, "; + close MERGEFILE; + #print STDERR "loaded merge map\n"; +} + + +while (<>) +{ + # cygwin perl on windows seems to read in the CR from a dos-mode (CRLF-terminated) text file. + # [Better would be to translate CRLF on read to a logical end of line represented as a single NL.) + # So strip any trailing CR, to ensure we have a line a string with a trailing logical NL marker only. + # Note "print" without arguments will print the whole line including the NL, which does seem to + # translate to a CRLF when done on Windows. When printing a constructed string, we have to include + # a NL, even if copying a suffix pattern match, since patterns normally don't match the NL. + s/\r$//; + + # Log header part saying "by JoeSmith on Joes-Computer" can reveal identity + # $1 $2 + if (/^(.*)# Log of Andes session begun (.*) by [\w-]+ on .*$/) + { + print "$1# Log of Andes session begun $2 by [user] on [computer]\n"; + } + # Standalone Andes also records typing in the Login dialog box + elsif (/^(.*)C 1127 (.*)$/) { + print "$1C1127 ?\n"; + } + # + # initial set session id call normally contains userid as part + # $1 $2 $3 + elsif (/^(.*)set-session-id "([^"]*)"(.*)$/) + { + # Session id form is UserID-MonthDay-Hours-Mins-Secs + # but any spaces in UserID are converted to hyphens + @id_parts = split('-', $2); + $nparts = @id_parts; + $id = $session_id = join(' ', @id_parts[0 .. $nparts-5]); + $rest = join('-', @id_parts[$nparts-4 .. $nparts-1]); + &munge_id(); + print "$1set-session-id \"$anonid-$rest\"$3\n"; + } + # + # read-student-info call also has user name + # $1 $2 $3 + elsif (/^(.*)read-student-info "([^"]*)"(.*)$/) + { + $id = $2; # session label should start with student id + if ($id ne $session_id) { + print STDERR "warning: student id $id != session id part $session_id!\n"; + } + &munge_id (); + print "$1read-student-info \"$anonid\"$3\n"; + } + # + # Open-Problem can contain user ids in the solution directory path + # + + # + # !!! kcd urls in hints can contain the user id + # + elsif ($id && /$id/i) + { + s/$id/$anonid/g; + print; + } + else { + print; + } +} # end while (<>) + +# at end of input, dump idmap so it can be saved +if ($modified_map) { + open MAPFILE, ">$filename" or die "Couldn't open $filename for updating: $!\n"; + while( ($k, $v) = each %idmap) { + print MAPFILE "$k\t$v\r\n"; + } + close MAPFILE; +} + + +sub munge_id () # reads global $id, sets global $anonid +{ + # check if we have noted this as a duplicate account of some student + $canon_id = $merge{$id} ? $merge{$id} : $id; + # canonicalize to lower case + $canon_id =~ tr/[A-Z]/[a-z]/; + if (! ($anonid = $idmap{$canon_id})) { + $anonid = $idmap{$canon_id} = $prefix . $id_counter++; + print STDERR "$canon_id\t$anonid\n"; + $modified_map = 1; + } +} diff --git a/LogProcessing/mkunitmap.pl b/LogProcessing/mkunitmap.pl new file mode 100755 index 000000000..920bb0e69 --- /dev/null +++ b/LogProcessing/mkunitmap.pl @@ -0,0 +1,21 @@ +#!/usr/bin/perl +# +# mkunitmap -- create unit map file from .aps files +# +# Usage: reads one or more aps files from standard input, writes +# tab-delimited list of problem id, problem set pairs to stdout. +# +# This can be used to generate the mapping file needed by log2xml +use File::Basename; +while (<>) +{ + chomp; + next if (/ANDES Problem Set/); + next if (/\.wmv/); + $problem = $_; + $problem =~ tr/a-z/A-Z/; + $problem =~ s/\r//; + next if $problem eq ""; + ($setname,$dir,$ext) = fileparse($ARGV, qr/\..*/); + print "$problem\t$setname\n"; +} diff --git a/LogProcessing/unescape.pl b/LogProcessing/unescape.pl new file mode 100755 index 000000000..ac50c1564 --- /dev/null +++ b/LogProcessing/unescape.pl @@ -0,0 +1,13 @@ +# copy stdin to stdout, changing CR-slash-n sequences to CR-LF +# Needed to handle logs extracted by certain database query which +# returns entire logs as single lines, with newlines escaped, using +# a single newline char between logs: + +while (<>) { # gulps a whole log as a single line with LF at end + chomp($_); # remove final NL separating logs + s/\r\\n/\r\n/g; # CR bslash n => CR LF used at end of lines + s/\\t/\t/g; # bslash t => TAB used after time stamps + # following occurs in escape sequence inside some hints, e.g \l, \v, \n + s/\\\\/\\/g; # bslash bslash => bslash + print "$_"; +} diff --git a/LogProcessing/unmapid-usna.pl b/LogProcessing/unmapid-usna.pl new file mode 100755 index 000000000..02310ba40 --- /dev/null +++ b/LogProcessing/unmapid-usna.pl @@ -0,0 +1,31 @@ +#!/usr/bin/perl +# +# unmapid -- map anonymized USNA mid ids in input file using +# external tab-delimited mapping file +# +# changes occurrences of an apparent anonymized mid id, once per line. +# If multiple user ids map to same anonymized id, which one is chosen +# is arbitrary + +# load mapping from "idmap.txt" in current working directory +open MAPFILE, "; +close MAPFILE; +%idmap = reverse %map; + +while (<>) +{ + s/\r$//; + + # Match any five hex-digit string + if (/([0-9A-F][0-9A-F][0-9A-F][0-9A-F][0-9A-F])/) { + if ($newid = $idmap{$1}) { + # print "changing $& to $newid\n"; + s/$&/$newid/; + } else { + print STDERR "no log idmap entry for $&\n"; + } + } + print; +} +