## Import Library

In [1]:
from zipfile import ZipFile
from bs4 import BeautifulSoup
import unicodedata
import re
import pandas as pd
import numpy as np

## Open Form B and read docx

In [2]:
#open docx with zipfile
with ZipFile('FORM B.docx', 'r') as zip:
    doc_xml = zip.read('word/document.xml')

In [3]:
# parse xml 
soup_xml = BeautifulSoup(doc_xml, 'xml')

In [4]:
# find body
body = soup_xml.find('body')

In [5]:
# list of lines and make a copy
lines = body.find_all(['w:p'])
lines_new = lines.copy()

## Open original and shuffled data

In [6]:
df = pd.read_csv('Questions_to_shuffle.csv')
df_new = pd.read_csv('shuffled.csv')

## Construct lines_new list with new answers 

In [7]:
# 1. get the new answer from [lines] and replace it to [lines_new].
# 2. replace A. B. C. D. to correct order.

def linegen(column_name, ABCD):
    # Get list of original and new answer line
    p_answers = df[column_name].astype(int).tolist()
    p_answers_new = df_new[column_name].tolist()
    
    # Replace answer to new answer
    for i, n in enumerate(p_answers_new):
        lines_new[p_answers[i]] = lines[n]
    
    ### Replace A. B. C. D. to ABCD
    for n in p_answers:
        string = unicodedata.normalize('NFKD', lines_new[n].find('w:t').get_text())
        # print(unicodedata.normalize('NFKD', lines_new[n].get_text()))
        string_new = ABCD + string[3:]
    
        lines_new[n].find('w:t').string = string_new
        # print(unicodedata.normalize('NFKD', lines_new[n].get_text()))
    
    print('Successfully replaced lines_new with',column_name,'and replaced to', ABCD)

### Reconstruct lines_new 

In [8]:
linegen('answer1_line_nbr', 'A. ')
linegen('answer2_line_nbr', 'B. ')
linegen('answer3_line_nbr', 'C. ')
linegen('answer4_line_nbr', 'D. ')

Successfully replaced lines_new with answer1_line_nbr and replaced to A. 
Successfully replaced lines_new with answer2_line_nbr and replaced to B. 
Successfully replaced lines_new with answer3_line_nbr and replaced to C. 
Successfully replaced lines_new with answer4_line_nbr and replaced to D. 


## Construct new_body

In [9]:
n_head = str(soup_xml).find('<w:body>')+8
head = str(soup_xml)[:n_head]
head

'<?xml version="1.0" encoding="utf-8"?>\n<w:document mc:Ignorable="w14 w15 w16se w16cid w16 w16cex w16sdtdh wp14" xmlns:aink="http://schemas.microsoft.com/office/drawing/2016/ink" xmlns:am3d="http://schemas.microsoft.com/office/drawing/2017/model3d" xmlns:cx="http://schemas.microsoft.com/office/drawing/2014/chartex" xmlns:cx1="http://schemas.microsoft.com/office/drawing/2015/9/8/chartex" xmlns:cx2="http://schemas.microsoft.com/office/drawing/2015/10/21/chartex" xmlns:cx3="http://schemas.microsoft.com/office/drawing/2016/5/9/chartex" xmlns:cx4="http://schemas.microsoft.com/office/drawing/2016/5/10/chartex" xmlns:cx5="http://schemas.microsoft.com/office/drawing/2016/5/11/chartex" xmlns:cx6="http://schemas.microsoft.com/office/drawing/2016/5/12/chartex" xmlns:cx7="http://schemas.microsoft.com/office/drawing/2016/5/13/chartex" xmlns:cx8="http://schemas.microsoft.com/office/drawing/2016/5/14/chartex" xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math" xmlns:mc="http://schem

In [10]:
n_tail = str(soup_xml).find('</w:body>')
tail = str(soup_xml)[n_tail:]
tail

'</w:body></w:document>'

In [11]:
body_new = ''.join(str(l) for l in lines_new)

In [12]:
soup_new = head + body_new + tail

In [39]:
# create a temp copy of the archive without filename            
with ZipFile('FORM C.docx', 'r') as zipin:
    with ZipFile('FORM D.docx', 'w') as zipout:
        zipout.comment = zipin.comment # preserve the comment
        for item in zipin.infolist():
            if item.filename != 'word/document.xml':
                    zipout.writestr(item, zipin.read(item.filename))

In [40]:
with ZipFile('FORM D.docx', 'a') as newzip:
    newzip.writestr('word/document.xml', soup_new)